thorns 5.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.thornsignore +581 -0
- package/README.md +175 -0
- package/advanced-metrics.js +203 -0
- package/analyzer.js +257 -0
- package/compact-formatter.js +960 -0
- package/dependency-analyzer.js +252 -0
- package/ignore-parser.js +150 -0
- package/index.js +6 -0
- package/lib.js +537 -0
- package/one-liner.sh +24 -0
- package/package.json +71 -0
- package/queries.js +102 -0
- package/run.sh +81 -0
package/README.md
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# Thorns - Ultra-Compact Codebase Intelligence
|
|
2
|
+
|
|
3
|
+
Cross-platform codebase analysis using tree-sitter for maximum insight with minimal output.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Cross-platform**: WSL, Linux, Windows, macOS (arm64/x64)
|
|
8
|
+
- **12 languages**: JavaScript, TypeScript, Python, Rust, Go, C/C++, Java, C#, Ruby, PHP, JSON
|
|
9
|
+
- **Dependency graph**: File-level imports, orphans, circular deps
|
|
10
|
+
- **Code duplication**: AST-based clone detection
|
|
11
|
+
- **Coupling metrics**: Most connected files
|
|
12
|
+
- **File sizes**: Distribution and largest files
|
|
13
|
+
- **Identifier usage**: Most common variables/functions
|
|
14
|
+
- **Complexity hotspots**: High cx/depth files
|
|
15
|
+
- **Ultra-comprehensive ignoring**: Auto-loads .gitignore, .dockerignore, .npmignore + 200+ built-in patterns
|
|
16
|
+
- **Smart filtering**: Ignores node_modules, target, vendor, dist, build, .cache, etc. across all languages
|
|
17
|
+
- **Ultra-compact**: Zero unnecessary tokens
|
|
18
|
+
- **Fast**: Native parsers, ~1000 files/sec, skips 96%+ of irrelevant files
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
### Quick Start
|
|
23
|
+
|
|
24
|
+
Analyze current directory:
|
|
25
|
+
```bash
|
|
26
|
+
bunx mcp-thorns
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Analyze specific directory:
|
|
30
|
+
```bash
|
|
31
|
+
bunx mcp-thorns /path/to/codebase
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Install Globally with Bun
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
bun add -g mcp-thorns
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Then use:
|
|
41
|
+
```bash
|
|
42
|
+
thorns /path/to/codebase
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Programmatic API
|
|
46
|
+
|
|
47
|
+
```javascript
|
|
48
|
+
import { analyze } from 'mcp-thorns';
|
|
49
|
+
|
|
50
|
+
// Get ultra-compact analysis as a string
|
|
51
|
+
const output = analyze('./path/to/codebase');
|
|
52
|
+
console.log(output);
|
|
53
|
+
|
|
54
|
+
// Or use the raw functions for custom formatting
|
|
55
|
+
import { analyzeCodebase, formatUltraCompact } from 'mcp-thorns';
|
|
56
|
+
|
|
57
|
+
const data = analyzeCodebase('./path/to/codebase');
|
|
58
|
+
const formatted = formatUltraCompact(data);
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Output Format
|
|
62
|
+
|
|
63
|
+
Ultra-compact cheat sheet with maximum information density:
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
━━━ 47f 8.5kL 44fn 5cls cx:7.9 d:21.7 ━━━
|
|
67
|
+
JA 97% 40f 8.3kL 44fn 5c 575i 96e cx:7.9
|
|
68
|
+
JS 3% 7f 214L 0fn 0c 0i 0e cx:0
|
|
69
|
+
━━━ fn ━━━
|
|
70
|
+
7× Ja:main(1)
|
|
71
|
+
6× Ja:uuid(1)
|
|
72
|
+
5× Ja:createApp(1)
|
|
73
|
+
4× Ja:verifyApps(1)
|
|
74
|
+
3× Ja:processNextApp(1)
|
|
75
|
+
━━━ cls ━━━
|
|
76
|
+
2× Ja:StatelessMCPTools
|
|
77
|
+
2× Ja:ValidationSystem
|
|
78
|
+
━━━ imports ━━━
|
|
79
|
+
2× import { CallToolRequestSchema...
|
|
80
|
+
1× import WebSocket from 'ws';
|
|
81
|
+
━━━ calls ━━━
|
|
82
|
+
504× console.log
|
|
83
|
+
63× ws.on
|
|
84
|
+
51× setTimeout
|
|
85
|
+
━━━ ⚠ hotspots ━━━
|
|
86
|
+
cx:57 d:19 src/validation-system.js
|
|
87
|
+
cx:20 d:28 src/stateless-mcp-tools-basic.js
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Legend:**
|
|
91
|
+
- Abbreviations: `f`=files `L`=lines `fn`=functions `cls`=classes `i`=imports `e`=exports `cx`=complexity `d`=AST-depth `(N)`=param-count
|
|
92
|
+
- Issues: `orph`=orphaned-files `dup`=duplicate-code `circ`=circular-deps `in/out`=dependency-coupling
|
|
93
|
+
- `TOTALS`: Total files, lines, functions, classes, avg complexity, avg depth | Issues counts
|
|
94
|
+
- Language rows: % of codebase, file/line/function/class/import/export counts, avg complexity
|
|
95
|
+
- `TOP-FUNCTIONS(most-defined)`: Most common function signatures (count × lang : signature)
|
|
96
|
+
- `TOP-CLASSES(most-defined)`: Most common classes
|
|
97
|
+
- `TOP-IMPORTS(common-deps)`: Most frequent imports
|
|
98
|
+
- `TOP-CALLS(frequent-invocations)`: Most called functions/APIs
|
|
99
|
+
- `HOTSPOTS(complex-files)`: Complexity hotspots - refactor candidates (cx=complexity, d=depth)
|
|
100
|
+
- `ORPHANS(unused-or-entries)`: Files not imported anywhere - potential dead code or entry points
|
|
101
|
+
- `COUPLING(central-files)`: Files with most dependencies - central hubs, refactor candidates (in←imports, out→uses)
|
|
102
|
+
- `DUPLICATES(code-clones)`: AST-based structural clones - consolidation candidates (count × hash : files)
|
|
103
|
+
- `CIRCULAR-DEPS(import-cycles)`: Import cycles - architecture issues
|
|
104
|
+
- `LARGEST-FILES(split-candidates)`: Largest files - maintainability risk
|
|
105
|
+
- `FILE-SIZE-DISTRIBUTION`: File size distribution by line count
|
|
106
|
+
- `TOP-IDENTIFIERS(common-names)`: Most used variable names in codebase
|
|
107
|
+
|
|
108
|
+
## Supported Languages
|
|
109
|
+
|
|
110
|
+
- JavaScript (.js, .mjs, .cjs, .jsx)
|
|
111
|
+
- TypeScript (.ts, .tsx)
|
|
112
|
+
- Python (.py)
|
|
113
|
+
- Rust (.rs)
|
|
114
|
+
- Go (.go)
|
|
115
|
+
- C (.c, .h)
|
|
116
|
+
- C++ (.cpp, .cc, .cxx, .hpp)
|
|
117
|
+
- Java (.java)
|
|
118
|
+
- C# (.cs)
|
|
119
|
+
- Ruby (.rb)
|
|
120
|
+
- PHP (.php)
|
|
121
|
+
- JSON (.json)
|
|
122
|
+
|
|
123
|
+
## Ignored Directories & Files
|
|
124
|
+
|
|
125
|
+
Automatically skips 200+ patterns including:
|
|
126
|
+
- **VCS**: `.git`, `.svn`, `.hg`, `CVS`
|
|
127
|
+
- **Build artifacts**: `dist`, `build`, `out`, `target`, `vendor`
|
|
128
|
+
- **Language-specific**: `node_modules`, `__pycache__`, `.gradle`, `Pods`, etc.
|
|
129
|
+
- **Caches**: `.cache`, `.next`, `.nuxt`, `.parcel-cache`, `.vite`, `.turbo`
|
|
130
|
+
- **IDEs**: `.vscode`, `.idea`, `.vs`, `.sublime-*`
|
|
131
|
+
- **Test & coverage**: `.pytest_cache`, `.nyc_output`, `coverage`, `htmlcov`
|
|
132
|
+
- **Home directories**: `~/.cache`, `~/.config`, `~/.local`, `~/.npm`, `~/.cargo`, etc. (when analyzing subdirs)
|
|
133
|
+
- **Lock files**: `bun.lockb`, `package-lock.json`, `yarn.lock`, `pnpm-lock.yaml`
|
|
134
|
+
- **Custom patterns**: Loads and merges `.gitignore`, `.dockerignore`, `.npmignore`, `.thornsignore`
|
|
135
|
+
|
|
136
|
+
Thorns prioritizes speed by ignoring 96%+ of non-code files.
|
|
137
|
+
|
|
138
|
+
## Requirements
|
|
139
|
+
|
|
140
|
+
- **Runtime**: Bun >= 1.0
|
|
141
|
+
- **Platform**: Linux, macOS, Windows (WSL2), Docker
|
|
142
|
+
- Prebuilt binaries download automatically for supported platforms
|
|
143
|
+
|
|
144
|
+
## Compatibility
|
|
145
|
+
|
|
146
|
+
✅ **Linux**: Ubuntu, Debian, Alpine, Fedora, Arch
|
|
147
|
+
✅ **macOS**: Intel & Apple Silicon (arm64)
|
|
148
|
+
✅ **Windows**: WSL2, Git Bash, PowerShell
|
|
149
|
+
✅ **Docker**: All Linux images (Alpine, Debian, Ubuntu)
|
|
150
|
+
✅ **CI/CD**: GitHub Actions, GitLab CI, Jenkins, CircleCI
|
|
151
|
+
✅ **Cloud**: AWS Lambda (with Node/Bun layer), GCP Cloud Functions
|
|
152
|
+
|
|
153
|
+
## Execution Methods
|
|
154
|
+
|
|
155
|
+
| Method | Best For | Requirements |
|
|
156
|
+
|--------|----------|--------------|
|
|
157
|
+
| bunx | Quick analysis | `bun` |
|
|
158
|
+
| Bun global | Repeated use | `bun` |
|
|
159
|
+
| Programmatic | Integration | Bun |
|
|
160
|
+
|
|
161
|
+
## How It Works
|
|
162
|
+
|
|
163
|
+
1. Walks directory tree, filtering by extension
|
|
164
|
+
2. Parses each file with tree-sitter
|
|
165
|
+
3. Analyzes AST for functions, classes, imports, exports, complexity
|
|
166
|
+
4. Aggregates statistics by language
|
|
167
|
+
5. Outputs compact summary
|
|
168
|
+
|
|
169
|
+
## Performance
|
|
170
|
+
|
|
171
|
+
- Skips files > 1MB
|
|
172
|
+
- Uses native parsers (not WASM)
|
|
173
|
+
- Minimal memory footprint
|
|
174
|
+
- Processes ~1000 files/second on modern hardware
|
|
175
|
+
# Triggered npm publishing
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
|
|
3
|
+
export function extractAdvancedMetrics(tree, sourceCode) {
|
|
4
|
+
const metrics = {
|
|
5
|
+
identifiers: new Map(),
|
|
6
|
+
functionLengths: [],
|
|
7
|
+
functionParams: [],
|
|
8
|
+
nestingDepths: [],
|
|
9
|
+
commentLines: 0,
|
|
10
|
+
blankLines: 0,
|
|
11
|
+
longFunctions: [],
|
|
12
|
+
deeplyNested: [],
|
|
13
|
+
manyParams: []
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const lines = sourceCode.split('\n');
|
|
17
|
+
metrics.blankLines = lines.filter(l => l.trim() === '').length;
|
|
18
|
+
metrics.commentLines = lines.filter(l => {
|
|
19
|
+
const t = l.trim();
|
|
20
|
+
return t.startsWith('//') || t.startsWith('#') || t.startsWith('/*') || t.startsWith('*');
|
|
21
|
+
}).length;
|
|
22
|
+
|
|
23
|
+
function getNodeDepth(node, depth = 0) {
|
|
24
|
+
let maxDepth = depth;
|
|
25
|
+
for (const child of node.children) {
|
|
26
|
+
maxDepth = Math.max(maxDepth, getNodeDepth(child, depth + 1));
|
|
27
|
+
}
|
|
28
|
+
return maxDepth;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function countParams(node) {
|
|
32
|
+
let count = 0;
|
|
33
|
+
function traverse(n) {
|
|
34
|
+
if (n.type === 'parameter' || n.type === 'formal_parameter' ||
|
|
35
|
+
n.type.includes('param')) {
|
|
36
|
+
count++;
|
|
37
|
+
}
|
|
38
|
+
for (const child of n.children) traverse(child);
|
|
39
|
+
}
|
|
40
|
+
traverse(node);
|
|
41
|
+
return count;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function getFunctionBody(node) {
|
|
45
|
+
for (const child of node.children) {
|
|
46
|
+
if (child.type === 'block' || child.type === 'statement_block' ||
|
|
47
|
+
child.type === 'body' || child.type.includes('body')) {
|
|
48
|
+
return child;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return node;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function traverse(node, depth = 0) {
|
|
55
|
+
const type = node.type;
|
|
56
|
+
|
|
57
|
+
// Track identifiers
|
|
58
|
+
if (type === 'identifier' || type === 'property_identifier' ||
|
|
59
|
+
type === 'type_identifier' || type === 'field_identifier') {
|
|
60
|
+
const name = node.text;
|
|
61
|
+
if (name && name.length < 50) {
|
|
62
|
+
metrics.identifiers.set(name, (metrics.identifiers.get(name) || 0) + 1);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Track function metrics
|
|
67
|
+
if (type.includes('function') && type.includes('declaration') ||
|
|
68
|
+
type === 'method_definition' || type === 'function_item') {
|
|
69
|
+
const body = getFunctionBody(node);
|
|
70
|
+
const bodyLines = body.text.split('\n').length;
|
|
71
|
+
const params = countParams(node);
|
|
72
|
+
const nestDepth = getNodeDepth(body);
|
|
73
|
+
|
|
74
|
+
metrics.functionLengths.push(bodyLines);
|
|
75
|
+
metrics.functionParams.push(params);
|
|
76
|
+
metrics.nestingDepths.push(nestDepth);
|
|
77
|
+
|
|
78
|
+
if (bodyLines > 50) {
|
|
79
|
+
metrics.longFunctions.push({ lines: bodyLines, text: node.text.slice(0, 100) });
|
|
80
|
+
}
|
|
81
|
+
if (nestDepth > 5) {
|
|
82
|
+
metrics.deeplyNested.push({ depth: nestDepth, text: node.text.slice(0, 100) });
|
|
83
|
+
}
|
|
84
|
+
if (params > 5) {
|
|
85
|
+
metrics.manyParams.push({ params, text: node.text.slice(0, 100) });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
for (const child of node.children) {
|
|
90
|
+
traverse(child, depth + 1);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
traverse(tree.rootNode);
|
|
95
|
+
return metrics;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function detectDuplication(fileMetrics) {
|
|
99
|
+
const hashes = new Map();
|
|
100
|
+
const duplicates = [];
|
|
101
|
+
|
|
102
|
+
for (const [file, data] of Object.entries(fileMetrics)) {
|
|
103
|
+
if (!data.functionHashes) continue;
|
|
104
|
+
|
|
105
|
+
for (const [funcSig, hash] of Object.entries(data.functionHashes)) {
|
|
106
|
+
if (!hashes.has(hash)) {
|
|
107
|
+
hashes.set(hash, []);
|
|
108
|
+
}
|
|
109
|
+
hashes.get(hash).push({ file, func: funcSig });
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
for (const [hash, instances] of hashes) {
|
|
114
|
+
if (instances.length > 1) {
|
|
115
|
+
duplicates.push({
|
|
116
|
+
hash,
|
|
117
|
+
count: instances.length,
|
|
118
|
+
instances: instances.slice(0, 5)
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return duplicates.sort((a, b) => b.count - a.count).slice(0, 10);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function hashFunction(node) {
|
|
127
|
+
// Create structural hash of function (ignoring variable names)
|
|
128
|
+
const structure = [];
|
|
129
|
+
|
|
130
|
+
function traverse(n) {
|
|
131
|
+
structure.push(n.type);
|
|
132
|
+
for (const child of n.children) {
|
|
133
|
+
if (!child.type.includes('identifier') && !child.type.includes('comment')) {
|
|
134
|
+
traverse(child);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
traverse(node);
|
|
140
|
+
return createHash('md5').update(structure.join(':')).digest('hex').slice(0, 8);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function detectCircularDeps(graph) {
|
|
144
|
+
const cycles = [];
|
|
145
|
+
const visiting = new Set();
|
|
146
|
+
const visited = new Set();
|
|
147
|
+
|
|
148
|
+
function dfs(node, path) {
|
|
149
|
+
if (visiting.has(node)) {
|
|
150
|
+
const cycleStart = path.indexOf(node);
|
|
151
|
+
if (cycleStart !== -1) {
|
|
152
|
+
cycles.push(path.slice(cycleStart).concat(node));
|
|
153
|
+
}
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (visited.has(node)) return;
|
|
158
|
+
|
|
159
|
+
visiting.add(node);
|
|
160
|
+
path.push(node);
|
|
161
|
+
|
|
162
|
+
const nodeData = graph.nodes.get(node);
|
|
163
|
+
if (nodeData) {
|
|
164
|
+
for (const dep of nodeData.importsFrom) {
|
|
165
|
+
dfs(dep, [...path]);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
visiting.delete(node);
|
|
170
|
+
visited.add(node);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
for (const node of graph.nodes.keys()) {
|
|
174
|
+
if (!visited.has(node)) {
|
|
175
|
+
dfs(node, []);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return cycles.slice(0, 5);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export function analyzeFileSizes(fileMetrics) {
|
|
183
|
+
const sizes = [];
|
|
184
|
+
|
|
185
|
+
for (const [file, data] of Object.entries(fileMetrics)) {
|
|
186
|
+
if (data.loc) {
|
|
187
|
+
sizes.push({ file, lines: data.loc });
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
sizes.sort((a, b) => b.lines - a.lines);
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
largest: sizes.slice(0, 10),
|
|
195
|
+
distribution: {
|
|
196
|
+
tiny: sizes.filter(s => s.lines < 50).length,
|
|
197
|
+
small: sizes.filter(s => s.lines >= 50 && s.lines < 200).length,
|
|
198
|
+
medium: sizes.filter(s => s.lines >= 200 && s.lines < 500).length,
|
|
199
|
+
large: sizes.filter(s => s.lines >= 500 && s.lines < 1000).length,
|
|
200
|
+
huge: sizes.filter(s => s.lines >= 1000).length
|
|
201
|
+
}
|
|
202
|
+
};
|
|
203
|
+
}
|
package/analyzer.js
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import Parser from 'tree-sitter';
|
|
2
|
+
import { createHash } from 'crypto';
|
|
3
|
+
|
|
4
|
+
export function extractEntities(tree, sourceCode, lang) {
|
|
5
|
+
const entities = {
|
|
6
|
+
functions: new Map(),
|
|
7
|
+
classes: new Map(),
|
|
8
|
+
imports: new Set(),
|
|
9
|
+
exports: new Set(),
|
|
10
|
+
patterns: new Map(),
|
|
11
|
+
asyncPatterns: { async: 0, await: 0, promise: 0, callback: 0, thenCatch: 0 },
|
|
12
|
+
errorPatterns: { tryCatch: 0, throw: 0, errorTypes: new Set() },
|
|
13
|
+
internalCalls: new Map(),
|
|
14
|
+
constants: [],
|
|
15
|
+
globalState: [],
|
|
16
|
+
envVars: new Set(),
|
|
17
|
+
urls: new Set(),
|
|
18
|
+
filePaths: new Set(),
|
|
19
|
+
eventPatterns: { emitters: 0, listeners: 0 },
|
|
20
|
+
httpPatterns: { routes: [], fetches: 0, axios: 0 },
|
|
21
|
+
storagePatterns: { sql: 0, fileOps: 0, json: 0 },
|
|
22
|
+
basicStats: { functions: 0, classes: 0, imports: 0, exports: 0, complexity: 0, lines: 0 }
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
entities.basicStats.lines = sourceCode.split('\n').length;
|
|
26
|
+
|
|
27
|
+
function hash(text) {
|
|
28
|
+
return createHash('md5').update(text).digest('hex').slice(0, 8);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function extractName(node) {
|
|
32
|
+
if (node.type === 'identifier' || node.type === 'property_identifier' || node.type === 'type_identifier') {
|
|
33
|
+
return node.text;
|
|
34
|
+
}
|
|
35
|
+
for (const child of node.children) {
|
|
36
|
+
if (child.type.includes('identifier')) return child.text;
|
|
37
|
+
}
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function traverse(node) {
|
|
42
|
+
const type = node.type;
|
|
43
|
+
const text = node.text;
|
|
44
|
+
|
|
45
|
+
// Functions
|
|
46
|
+
if (type.includes('function') && type.includes('declaration') ||
|
|
47
|
+
type === 'method_definition' || type === 'function_item') {
|
|
48
|
+
const name = extractName(node) || 'anon';
|
|
49
|
+
const params = text.match(/\((.*?)\)/)?.[1] || '';
|
|
50
|
+
const sig = `${name}(${params.split(',').length})`;
|
|
51
|
+
const h = hash(text);
|
|
52
|
+
const existing = entities.functions.get(sig) || { count: 0, hash: h, params };
|
|
53
|
+
existing.count++;
|
|
54
|
+
entities.functions.set(sig, existing);
|
|
55
|
+
entities.basicStats.functions++;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Classes/structs/types
|
|
59
|
+
if (type.includes('class') && type.includes('declaration') ||
|
|
60
|
+
type === 'struct_item' || type === 'enum_item' || type === 'interface_declaration') {
|
|
61
|
+
const name = extractName(node) || 'anon';
|
|
62
|
+
const existing = entities.classes.get(name) || { count: 0, type: type.split('_')[0] };
|
|
63
|
+
existing.count++;
|
|
64
|
+
entities.classes.set(name, existing);
|
|
65
|
+
entities.basicStats.classes++;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Imports
|
|
69
|
+
if (type.includes('import')) {
|
|
70
|
+
const imp = text.replace(/\s+/g, ' ').slice(0, 60);
|
|
71
|
+
entities.imports.add(imp);
|
|
72
|
+
entities.basicStats.imports++;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Exports
|
|
76
|
+
if (type.includes('export')) {
|
|
77
|
+
const exp = extractName(node) || text.slice(0, 30);
|
|
78
|
+
entities.exports.add(exp);
|
|
79
|
+
entities.basicStats.exports++;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Complexity
|
|
83
|
+
if (type === 'if_statement' || type === 'while_statement' || type === 'for_statement' ||
|
|
84
|
+
type === 'case_statement' || type === 'catch_clause') {
|
|
85
|
+
entities.basicStats.complexity++;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Patterns (API calls, common patterns)
|
|
89
|
+
if (type === 'call_expression' || type === 'call' || type === 'function_call') {
|
|
90
|
+
const name = node.children[0]?.text || '';
|
|
91
|
+
if (name && name.length < 30) {
|
|
92
|
+
const existing = entities.patterns.get(name) || 0;
|
|
93
|
+
entities.patterns.set(name, existing + 1);
|
|
94
|
+
|
|
95
|
+
if (!name.includes('.') && name.match(/^[a-z]/)) {
|
|
96
|
+
entities.internalCalls.set(name, (entities.internalCalls.get(name) || 0) + 1);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (name.endsWith('.then') || name.endsWith('.catch')) {
|
|
100
|
+
entities.asyncPatterns.thenCatch++;
|
|
101
|
+
}
|
|
102
|
+
if (name === 'Promise' || name.startsWith('Promise.')) {
|
|
103
|
+
entities.asyncPatterns.promise++;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Async patterns
|
|
109
|
+
if (type === 'async_function' || type === 'async_arrow_function' || type === 'async_function_declaration' || text.startsWith('async ')) {
|
|
110
|
+
entities.asyncPatterns.async++;
|
|
111
|
+
}
|
|
112
|
+
if (type === 'await_expression') {
|
|
113
|
+
entities.asyncPatterns.await++;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Callback patterns (function params that look like callbacks)
|
|
117
|
+
if ((type === 'arrow_function' || type === 'function_expression') && node.parent?.type === 'arguments') {
|
|
118
|
+
entities.asyncPatterns.callback++;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Error handling patterns
|
|
122
|
+
if (type === 'try_statement') {
|
|
123
|
+
entities.errorPatterns.tryCatch++;
|
|
124
|
+
}
|
|
125
|
+
if (type === 'throw_statement') {
|
|
126
|
+
entities.errorPatterns.throw++;
|
|
127
|
+
const errType = text.match(/throw new (\w+)/)?.[1];
|
|
128
|
+
if (errType) entities.errorPatterns.errorTypes.add(errType);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Constants (module-level const declarations with UPPER_CASE names or literal values)
|
|
132
|
+
if (type === 'lexical_declaration' && text.startsWith('const ')) {
|
|
133
|
+
const isTopLevel = node.parent?.type === 'program' || node.parent?.type === 'export_statement';
|
|
134
|
+
if (isTopLevel) {
|
|
135
|
+
const nameMatch = text.match(/const\s+(\w+)\s*=/);
|
|
136
|
+
if (nameMatch) {
|
|
137
|
+
const name = nameMatch[1];
|
|
138
|
+
const isConstStyle = name === name.toUpperCase() && name.length > 2;
|
|
139
|
+
const hasLiteralValue = /=\s*(\d+|['"`][^'"`]*['"`]|true|false|null)/.test(text);
|
|
140
|
+
if (isConstStyle || hasLiteralValue) {
|
|
141
|
+
const valueMatch = text.match(/=\s*([^;]+)/);
|
|
142
|
+
const value = valueMatch ? valueMatch[1].trim().slice(0, 30) : '';
|
|
143
|
+
entities.constants.push({ name, value });
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Global state (module-level let/var declarations)
|
|
150
|
+
if ((type === 'lexical_declaration' && text.startsWith('let ')) || type === 'variable_declaration') {
|
|
151
|
+
const isTopLevel = node.parent?.type === 'program' || node.parent?.type === 'export_statement';
|
|
152
|
+
if (isTopLevel && !text.startsWith('const ')) {
|
|
153
|
+
const nameMatch = text.match(/(let|var)\s+(\w+)/);
|
|
154
|
+
if (nameMatch) {
|
|
155
|
+
entities.globalState.push(nameMatch[2]);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Environment variables
|
|
161
|
+
const envMatch = text.match(/process\.env\.(\w+)/g);
|
|
162
|
+
if (envMatch) {
|
|
163
|
+
envMatch.forEach(e => entities.envVars.add(e.replace('process.env.', '')));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// URLs and paths in strings
|
|
167
|
+
if (type === 'string' || type === 'template_string') {
|
|
168
|
+
const urlMatch = text.match(/https?:\/\/[^\s'"`,)]+/g);
|
|
169
|
+
if (urlMatch) urlMatch.forEach(u => entities.urls.add(u.slice(0, 60)));
|
|
170
|
+
|
|
171
|
+
const pathMatch = text.match(/['"](\.?\.?\/[\w\-./]+)['"]/);
|
|
172
|
+
if (pathMatch && pathMatch[1].includes('/')) {
|
|
173
|
+
entities.filePaths.add(pathMatch[1]);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Event patterns
|
|
178
|
+
if (type === 'call_expression') {
|
|
179
|
+
const callText = node.children[0]?.text || '';
|
|
180
|
+
if (callText.match(/\.(on|once|addEventListener|addListener)\s*$/)) {
|
|
181
|
+
entities.eventPatterns.listeners++;
|
|
182
|
+
}
|
|
183
|
+
if (callText.match(/\.(emit|dispatch|dispatchEvent|trigger)\s*$/)) {
|
|
184
|
+
entities.eventPatterns.emitters++;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// HTTP patterns
|
|
188
|
+
if (callText === 'fetch' || callText.endsWith('.fetch')) {
|
|
189
|
+
entities.httpPatterns.fetches++;
|
|
190
|
+
}
|
|
191
|
+
if (callText.match(/axios\.(get|post|put|delete|patch)/)) {
|
|
192
|
+
entities.httpPatterns.axios++;
|
|
193
|
+
}
|
|
194
|
+
if (callText.match(/\.(get|post|put|delete|patch|use)\s*$/) && text.includes("'/" )) {
|
|
195
|
+
const routeMatch = text.match(/['"](\/.+?)['"]/);
|
|
196
|
+
if (routeMatch) entities.httpPatterns.routes.push(routeMatch[1]);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Storage patterns
|
|
200
|
+
if (callText.match(/\.(query|execute|prepare|run)\s*$/) || text.includes('SELECT') || text.includes('INSERT')) {
|
|
201
|
+
entities.storagePatterns.sql++;
|
|
202
|
+
}
|
|
203
|
+
if (callText.match(/(readFile|writeFile|readdir|mkdir|unlink|stat)/)) {
|
|
204
|
+
entities.storagePatterns.fileOps++;
|
|
205
|
+
}
|
|
206
|
+
if (callText.match(/JSON\.(parse|stringify)/)) {
|
|
207
|
+
entities.storagePatterns.json++;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
for (const child of node.children) {
|
|
212
|
+
traverse(child);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
traverse(tree.rootNode);
|
|
217
|
+
return entities;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export function calculateMetrics(tree, sourceCode) {
|
|
221
|
+
let depth = 0, maxDepth = 0, nodes = 0;
|
|
222
|
+
let branches = 0, loops = 0, returns = 0;
|
|
223
|
+
|
|
224
|
+
function traverse(node, level = 0) {
|
|
225
|
+
nodes++;
|
|
226
|
+
maxDepth = Math.max(maxDepth, level);
|
|
227
|
+
|
|
228
|
+
const type = node.type;
|
|
229
|
+
if (['if_statement', 'switch_statement', 'case_statement', 'conditional_expression', 'match_expression'].includes(type)) branches++;
|
|
230
|
+
if (['while_statement', 'for_statement', 'loop_expression', 'for_in_statement'].includes(type)) loops++;
|
|
231
|
+
if (type.includes('return')) returns++;
|
|
232
|
+
|
|
233
|
+
for (const child of node.children) {
|
|
234
|
+
traverse(child, level + 1);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
traverse(tree.rootNode);
|
|
239
|
+
|
|
240
|
+
const lines = sourceCode.split('\n');
|
|
241
|
+
const blankLines = lines.filter(l => l.trim() === '').length;
|
|
242
|
+
const commentLines = lines.filter(l => {
|
|
243
|
+
const t = l.trim();
|
|
244
|
+
return t.startsWith('//') || t.startsWith('#') || t.startsWith('/*') || t.startsWith('*');
|
|
245
|
+
}).length;
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
nodes,
|
|
249
|
+
maxDepth,
|
|
250
|
+
branches,
|
|
251
|
+
loops,
|
|
252
|
+
returns,
|
|
253
|
+
loc: lines.length,
|
|
254
|
+
sloc: lines.length - blankLines - commentLines,
|
|
255
|
+
density: nodes / lines.length
|
|
256
|
+
};
|
|
257
|
+
}
|