@wbern/obscene 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -8
- package/dist/cli.js +119 -12
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -86,20 +86,69 @@ Per-file complexity without churn. Useful for raw complexity distribution.
|
|
|
86
86
|
| `--format <type>` | `json` | `json` or `table` |
|
|
87
87
|
| `--exclude <patterns...>` | — | Additional exclusion patterns |
|
|
88
88
|
|
|
89
|
+
## Metrics
|
|
90
|
+
|
|
91
|
+
Each hotspot row includes the following metrics:
|
|
92
|
+
|
|
93
|
+
### Hotspot score (`Score`)
|
|
94
|
+
|
|
95
|
+
`complexity × churn`. The core ranking metric — files that are both complex and frequently modified bubble to the top. See [Why churn × complexity?](#why-churn-x-complexity) for the research backing this approach.
|
|
96
|
+
|
|
97
|
+
### Churn (`Churn`)
|
|
98
|
+
|
|
99
|
+
Number of commits touching the file within the configured time window (default: 3 months). Measures how actively the file is being modified.
|
|
100
|
+
|
|
101
|
+
### Cyclomatic complexity (`Cmplx`)
|
|
102
|
+
|
|
103
|
+
Total cyclomatic complexity as reported by [scc](https://github.com/boyter/scc). Counts independent execution paths (branches, loops, conditions). Higher values mean more paths to test and more places for bugs to hide.
|
|
104
|
+
|
|
105
|
+
### Complexity density (`Dens`)
|
|
106
|
+
|
|
107
|
+
`complexity / lines of code`. Normalizes complexity by file size so a 50-line file with complexity 25 (density 0.50) stands out against a 500-line file with complexity 25 (density 0.05). Based on Harrison & Magel (1981), who found that complexity relative to code size is a stronger fault predictor than raw complexity alone.
|
|
108
|
+
|
|
109
|
+
### Defects (`Dfcts`)
|
|
110
|
+
|
|
111
|
+
Count of `fix:` conventional commits touching the file within the churn window. A proxy for historical defect rate — files that attract repeated fixes are more likely to contain latent bugs. Inspired by Moser, Pedrycz & Succi (2008), who showed that change-history metrics outperform static code metrics for defect prediction.
|
|
112
|
+
|
|
113
|
+
### Defect density (`defectDensity`, JSON only)
|
|
114
|
+
|
|
115
|
+
`defects / lines of code`. Not shown in table output due to column width, but available in JSON. Normalizes defect count by file size.
|
|
116
|
+
|
|
117
|
+
### Nesting depth (`Nest`)
|
|
118
|
+
|
|
119
|
+
Maximum indentation level (tab stops) in the file. Deep nesting correlates with high cognitive load and defect likelihood. Harrison & Magel (1981) identified nesting depth as a significant complexity contributor.
|
|
120
|
+
|
|
121
|
+
### Unique authors (`Auth`)
|
|
122
|
+
|
|
123
|
+
Number of distinct git authors who committed to the file within the churn window. Files touched by many authors may lack clear ownership and accumulate inconsistent patterns. Kamei et al. (2013) found developer count to be a significant predictor of defect-introducing changes.
|
|
124
|
+
|
|
125
|
+
### Tier
|
|
126
|
+
|
|
127
|
+
Cumulative score distribution bucket:
|
|
128
|
+
|
|
129
|
+
| Tier | Range | Meaning |
|
|
130
|
+
|------|-------|---------|
|
|
131
|
+
| **danger** | top 50% of total score | Refactor candidates |
|
|
132
|
+
| **watch** | next 30% (50–80%) | Keep an eye on these |
|
|
133
|
+
| **stable** | bottom 20% | Low risk |
|
|
134
|
+
|
|
89
135
|
## Example output
|
|
90
136
|
|
|
91
137
|
```
|
|
92
|
-
Hotspots — 3 months churn window | Total score:
|
|
138
|
+
Hotspots — 3 months churn window | Total score: 35,452
|
|
93
139
|
Tiers: 3 danger, 13 watch, 194 stable
|
|
94
140
|
Showing: 5 of 210
|
|
95
141
|
|
|
96
|
-
File
|
|
97
|
-
|
|
98
|
-
src/utils/effect-generator.ts
|
|
99
|
-
src/services/game-engine.ts
|
|
100
|
-
src/components/board-renderer.tsx
|
|
101
|
-
src/hooks/use-game-state.ts
|
|
102
|
-
src/utils/move-validator.ts
|
|
142
|
+
File Score % Churn Cmplx Dens Dfcts Nest Auth Tier
|
|
143
|
+
────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
|
144
|
+
src/utils/effect-generator.ts 8,296 23.4 68 122 0.12 5 6 4 DANGER
|
|
145
|
+
src/services/game-engine.ts 4,284 12.1 51 84 0.09 3 4 3 DANGER
|
|
146
|
+
src/components/board-renderer.tsx 2,940 8.3 42 70 0.11 2 5 3 DANGER
|
|
147
|
+
src/hooks/use-game-state.ts 1,320 3.7 33 40 0.08 1 3 2 WATCH
|
|
148
|
+
src/utils/move-validator.ts 945 2.7 27 35 0.06 0 2 1 WATCH
|
|
149
|
+
|
|
150
|
+
Score=complexity×churn | Dens=complexity/code | Dfcts=fix commits | Nest=max indent depth | Auth=unique authors
|
|
151
|
+
Docs: https://github.com/wbern/obscene#metrics
|
|
103
152
|
```
|
|
104
153
|
|
|
105
154
|
## Supported languages
|
|
@@ -110,6 +159,17 @@ Any language [scc supports](https://github.com/boyter/scc#features) — 200+ lan
|
|
|
110
159
|
|
|
111
160
|
Test and generated files are excluded automatically: `*.test.*`, `*.spec.*`, `__tests__/`, `__mocks__/`, `*.stories.*`, `*.d.ts`, and similar patterns. scc also skips generated files by default (`--no-gen`).
|
|
112
161
|
|
|
162
|
+
## Why churn x complexity?
|
|
163
|
+
|
|
164
|
+
Files that are both complex and frequently modified are disproportionately likely to contain defects. This is backed by decades of empirical software engineering research:
|
|
165
|
+
|
|
166
|
+
- **Nagappan & Ball (2005)** studied Windows Server 2003 and found that relative code churn measures predict system defect density with 89% accuracy. — [ICSE 2005](https://doi.org/10.1109/ICSE.2005.1553571)
|
|
167
|
+
- **Moser, Pedrycz & Succi (2008)** compared change metrics against static code attributes on Eclipse and found that process metrics (churn, change frequency) outperform static code metrics for defect prediction. — [ICSE 2008](https://doi.org/10.1145/1368088.1368114)
|
|
168
|
+
- **Shin, Meneely, Williams & Osborne (2011)** combined complexity, churn, and developer activity metrics to predict vulnerabilities in Mozilla Firefox and the Linux kernel. By flagging only 10.9% of files, the model identified 70.8% of known vulnerabilities. — [IEEE TSE](https://doi.org/10.1109/TSE.2010.55)
|
|
169
|
+
- **Tornhill & Borg (2022)** analyzed 39 proprietary codebases and found that low-quality code (by their Code Health metric) contains 15x more defects and takes 124% longer to resolve. In their case studies, 4% of the codebase was responsible for 72% of all defects. — [ACM/IEEE TechDebt 2022](https://arxiv.org/abs/2203.04374)
|
|
170
|
+
|
|
171
|
+
The general approach was popularized by Adam Tornhill's *Your Code as a Crime Scene* (2015), which applies forensic analysis techniques to version control history.
|
|
172
|
+
|
|
113
173
|
## Limitations
|
|
114
174
|
|
|
115
175
|
- **Churn = commit count**, not lines changed. A one-line typo fix counts the same as a 500-line rewrite.
|
package/dist/cli.js
CHANGED
|
@@ -5,6 +5,7 @@ import { Command } from "commander";
|
|
|
5
5
|
|
|
6
6
|
// src/analyze.ts
|
|
7
7
|
import { execSync } from "child_process";
|
|
8
|
+
import { readFileSync } from "fs";
|
|
8
9
|
var DEFAULT_EXCLUDES = [
|
|
9
10
|
/\.test\./,
|
|
10
11
|
/\.spec\./,
|
|
@@ -64,31 +65,123 @@ function runScc(excludes = []) {
|
|
|
64
65
|
}
|
|
65
66
|
return files.sort((a, b) => b.complexity - a.complexity);
|
|
66
67
|
}
|
|
68
|
+
function gitFileCount(gitArgs, errorMessage) {
|
|
69
|
+
let raw;
|
|
70
|
+
try {
|
|
71
|
+
raw = execSync(gitArgs, {
|
|
72
|
+
maxBuffer: 50 * 1024 * 1024,
|
|
73
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
74
|
+
});
|
|
75
|
+
} catch {
|
|
76
|
+
throw new Error(errorMessage);
|
|
77
|
+
}
|
|
78
|
+
const counts = /* @__PURE__ */ new Map();
|
|
79
|
+
for (const line of raw.toString().split("\n")) {
|
|
80
|
+
const trimmed = normalizePath(line.trim());
|
|
81
|
+
if (!trimmed) continue;
|
|
82
|
+
counts.set(trimmed, (counts.get(trimmed) ?? 0) + 1);
|
|
83
|
+
}
|
|
84
|
+
return counts;
|
|
85
|
+
}
|
|
67
86
|
function getChurn(months) {
|
|
87
|
+
return gitFileCount(
|
|
88
|
+
`git log --since="${months} months ago" --format="" --name-only`,
|
|
89
|
+
"Not a git repository or git is not installed."
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
function getDefects(months) {
|
|
93
|
+
return gitFileCount(
|
|
94
|
+
`git log --since="${months} months ago" --grep="^fix" --format="" --name-only`,
|
|
95
|
+
"Not a git repository or git is not installed."
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
function getAuthors(months) {
|
|
68
99
|
let raw;
|
|
69
100
|
try {
|
|
70
101
|
raw = execSync(
|
|
71
|
-
`git log --since="${months} months ago" --format="" --name-only`,
|
|
102
|
+
`git log --since="${months} months ago" --format="COMMIT_SEP%n%aN" --name-only`,
|
|
72
103
|
{ maxBuffer: 50 * 1024 * 1024, stdio: ["pipe", "pipe", "pipe"] }
|
|
73
104
|
);
|
|
74
105
|
} catch {
|
|
75
106
|
throw new Error("Not a git repository or git is not installed.");
|
|
76
107
|
}
|
|
108
|
+
const authorSets = /* @__PURE__ */ new Map();
|
|
109
|
+
const blocks = raw.toString().split("COMMIT_SEP\n");
|
|
110
|
+
for (const block of blocks) {
|
|
111
|
+
if (!block.trim()) continue;
|
|
112
|
+
const lines = block.split("\n");
|
|
113
|
+
const author = lines[0].trim();
|
|
114
|
+
if (!author) continue;
|
|
115
|
+
for (let i = 1; i < lines.length; i++) {
|
|
116
|
+
const file = normalizePath(lines[i].trim());
|
|
117
|
+
if (!file) continue;
|
|
118
|
+
let set = authorSets.get(file);
|
|
119
|
+
if (!set) {
|
|
120
|
+
set = /* @__PURE__ */ new Set();
|
|
121
|
+
authorSets.set(file, set);
|
|
122
|
+
}
|
|
123
|
+
set.add(author);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
77
126
|
const counts = /* @__PURE__ */ new Map();
|
|
78
|
-
for (const
|
|
79
|
-
|
|
80
|
-
if (!trimmed) continue;
|
|
81
|
-
counts.set(trimmed, (counts.get(trimmed) ?? 0) + 1);
|
|
127
|
+
for (const [file, set] of authorSets) {
|
|
128
|
+
counts.set(file, set.size);
|
|
82
129
|
}
|
|
83
130
|
return counts;
|
|
84
131
|
}
|
|
85
|
-
function
|
|
132
|
+
function getNestingDepths(filePaths) {
|
|
133
|
+
const depths = /* @__PURE__ */ new Map();
|
|
134
|
+
for (const filePath of filePaths) {
|
|
135
|
+
let content;
|
|
136
|
+
try {
|
|
137
|
+
content = readFileSync(filePath, "utf-8");
|
|
138
|
+
} catch {
|
|
139
|
+
depths.set(filePath, 0);
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
let minSpaces = Number.POSITIVE_INFINITY;
|
|
143
|
+
const leadings = [];
|
|
144
|
+
for (const line of content.split("\n")) {
|
|
145
|
+
if (!line.trim()) continue;
|
|
146
|
+
const match = line.match(/^(\s+)/);
|
|
147
|
+
if (!match) continue;
|
|
148
|
+
const leading = match[1];
|
|
149
|
+
leadings.push(leading);
|
|
150
|
+
const spaceCount = (leading.match(/ /g) ?? []).length;
|
|
151
|
+
if (spaceCount > 0 && !leading.includes(" ") && spaceCount < minSpaces) {
|
|
152
|
+
minSpaces = spaceCount;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
const indentUnit = minSpaces === Number.POSITIVE_INFINITY ? 4 : minSpaces;
|
|
156
|
+
let maxDepth = 0;
|
|
157
|
+
for (const leading of leadings) {
|
|
158
|
+
let depth = 0;
|
|
159
|
+
for (const ch of leading) {
|
|
160
|
+
if (ch === " ") {
|
|
161
|
+
depth += 1;
|
|
162
|
+
} else if (ch === " ") {
|
|
163
|
+
depth += 1 / indentUnit;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
depth = Math.floor(depth);
|
|
167
|
+
if (depth > maxDepth) maxDepth = depth;
|
|
168
|
+
}
|
|
169
|
+
depths.set(filePath, maxDepth);
|
|
170
|
+
}
|
|
171
|
+
return depths;
|
|
172
|
+
}
|
|
173
|
+
function computeHotspots(files, churn, defects = /* @__PURE__ */ new Map(), nestingDepths = /* @__PURE__ */ new Map(), authors = /* @__PURE__ */ new Map()) {
|
|
86
174
|
const scored = files.map((f) => {
|
|
87
175
|
const fileChurn = churn.get(f.file) ?? 0;
|
|
176
|
+
const fileDefects = defects.get(f.file) ?? 0;
|
|
88
177
|
return {
|
|
89
178
|
...f,
|
|
90
179
|
churn: fileChurn,
|
|
91
|
-
hotspotScore: f.complexity * fileChurn
|
|
180
|
+
hotspotScore: f.complexity * fileChurn,
|
|
181
|
+
defects: fileDefects,
|
|
182
|
+
defectDensity: f.code > 0 ? Math.round(fileDefects / f.code * 1e4) / 1e4 : 0,
|
|
183
|
+
maxNesting: nestingDepths.get(f.file) ?? 0,
|
|
184
|
+
authors: authors.get(f.file) ?? 0
|
|
92
185
|
};
|
|
93
186
|
}).filter((h) => h.hotspotScore > 0).sort((a, b) => b.hotspotScore - a.hotspotScore);
|
|
94
187
|
const totalScore = scored.reduce((sum, h) => sum + h.hotspotScore, 0);
|
|
@@ -144,15 +237,20 @@ function formatHotspotsTable(output) {
|
|
|
144
237
|
lines.push(`Showing: ${output.showing} of ${output.totalHotspots}`);
|
|
145
238
|
lines.push("");
|
|
146
239
|
lines.push(
|
|
147
|
-
padRight("File", 50) + padLeft("Score", 8) + padLeft("%", 7) + padLeft("Churn", 7) + padLeft("Cmplx", 7) + padLeft("
|
|
240
|
+
padRight("File", 50) + padLeft("Score", 8) + padLeft("%", 7) + padLeft("Churn", 7) + padLeft("Cmplx", 7) + padLeft("Dens", 7) + padLeft("Dfcts", 6) + padLeft("Nest", 6) + padLeft("Auth", 6) + padLeft("Tier", 8)
|
|
148
241
|
);
|
|
149
|
-
lines.push("\u2500".repeat(
|
|
242
|
+
lines.push("\u2500".repeat(112));
|
|
150
243
|
for (const h of hotspots) {
|
|
151
244
|
const tierLabel = h.tier === "danger" ? "DANGER" : h.tier === "watch" ? "WATCH" : "stable";
|
|
152
245
|
lines.push(
|
|
153
|
-
padRight(truncate(h.file, 48), 50) + padLeft(h.hotspotScore.toLocaleString(), 8) + padLeft(h.percentOfTotal.toFixed(1), 7) + padLeft(String(h.churn), 7) + padLeft(String(h.complexity), 7) + padLeft(h.complexityDensity.toFixed(2),
|
|
246
|
+
padRight(truncate(h.file, 48), 50) + padLeft(h.hotspotScore.toLocaleString(), 8) + padLeft(h.percentOfTotal.toFixed(1), 7) + padLeft(String(h.churn), 7) + padLeft(String(h.complexity), 7) + padLeft(h.complexityDensity.toFixed(2), 7) + padLeft(String(h.defects), 6) + padLeft(String(h.maxNesting), 6) + padLeft(String(h.authors), 6) + padLeft(tierLabel, 8)
|
|
154
247
|
);
|
|
155
248
|
}
|
|
249
|
+
lines.push("");
|
|
250
|
+
lines.push(
|
|
251
|
+
"Score=complexity\xD7churn | Dens=complexity/code | Dfcts=fix commits | Nest=max indent depth | Auth=unique authors"
|
|
252
|
+
);
|
|
253
|
+
lines.push("Docs: https://github.com/wbern/obscene#metrics");
|
|
156
254
|
return lines.join("\n");
|
|
157
255
|
}
|
|
158
256
|
function padRight(s, n) {
|
|
@@ -167,7 +265,7 @@ function truncate(s, max) {
|
|
|
167
265
|
|
|
168
266
|
// src/cli.ts
|
|
169
267
|
var program = new Command();
|
|
170
|
-
program.name("obscene").description("Identify hotspot files \u2014 complex code that changes frequently").version("0.
|
|
268
|
+
program.name("obscene").description("Identify hotspot files \u2014 complex code that changes frequently").version("0.2.1");
|
|
171
269
|
function addSharedOptions(cmd) {
|
|
172
270
|
return cmd.option("--top <n>", "limit to top N entries (0 = all)", "20").option("--format <type>", "output format: json | table", "json").option(
|
|
173
271
|
"--exclude <patterns...>",
|
|
@@ -227,7 +325,16 @@ function runHotspots(opts) {
|
|
|
227
325
|
const months = parseInt(opts.months, 10);
|
|
228
326
|
const files = runScc(opts.exclude);
|
|
229
327
|
const churn = getChurn(months);
|
|
230
|
-
const
|
|
328
|
+
const defects = getDefects(months);
|
|
329
|
+
const authors = getAuthors(months);
|
|
330
|
+
const nestingDepths = getNestingDepths(files.map((f) => f.file));
|
|
331
|
+
const hotspots = computeHotspots(
|
|
332
|
+
files,
|
|
333
|
+
churn,
|
|
334
|
+
defects,
|
|
335
|
+
nestingDepths,
|
|
336
|
+
authors
|
|
337
|
+
);
|
|
231
338
|
const limited = top > 0 ? hotspots.slice(0, top) : hotspots;
|
|
232
339
|
const tierCounts = { danger: 0, watch: 0, stable: 0 };
|
|
233
340
|
for (const h of hotspots) {
|