sourcebook 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +111 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +17 -0
- package/dist/commands/init.d.ts +8 -0
- package/dist/commands/init.js +91 -0
- package/dist/generators/claude.d.ts +11 -0
- package/dist/generators/claude.js +191 -0
- package/dist/generators/copilot.d.ts +12 -0
- package/dist/generators/copilot.js +119 -0
- package/dist/generators/cursor.d.ts +17 -0
- package/dist/generators/cursor.js +123 -0
- package/dist/scanner/build.d.ts +2 -0
- package/dist/scanner/build.js +56 -0
- package/dist/scanner/frameworks.d.ts +2 -0
- package/dist/scanner/frameworks.js +230 -0
- package/dist/scanner/git.d.ts +17 -0
- package/dist/scanner/git.js +317 -0
- package/dist/scanner/graph.d.ts +17 -0
- package/dist/scanner/graph.js +251 -0
- package/dist/scanner/index.d.ts +2 -0
- package/dist/scanner/index.js +87 -0
- package/dist/scanner/patterns.d.ts +6 -0
- package/dist/scanner/patterns.js +203 -0
- package/dist/scanner/structure.d.ts +2 -0
- package/dist/scanner/structure.js +148 -0
- package/dist/types.d.ts +51 -0
- package/dist/types.js +1 -0
- package/dist/utils/output.d.ts +1 -0
- package/dist/utils/output.js +10 -0
- package/package.json +53 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
import { execSync } from "node:child_process";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
/**
|
|
4
|
+
* Mine git history for non-obvious context:
|
|
5
|
+
* - Reverted commits (literal "don't do this" signals)
|
|
6
|
+
* - Co-change coupling (invisible dependencies)
|
|
7
|
+
* - Recently active areas
|
|
8
|
+
* - Commit message patterns (module structure)
|
|
9
|
+
* - Rapid re-edits (code that was hard to get right)
|
|
10
|
+
*/
|
|
11
|
+
export async function analyzeGitHistory(dir) {
|
|
12
|
+
const findings = [];
|
|
13
|
+
const activeAreas = [];
|
|
14
|
+
const revertedPatterns = [];
|
|
15
|
+
const coChangeClusters = [];
|
|
16
|
+
// Check if this is a git repo
|
|
17
|
+
if (!isGitRepo(dir)) {
|
|
18
|
+
return { findings, activeAreas, revertedPatterns, coChangeClusters };
|
|
19
|
+
}
|
|
20
|
+
// 1. Reverted commits -- "don't do this" signals
|
|
21
|
+
findings.push(...detectRevertedPatterns(dir, revertedPatterns));
|
|
22
|
+
// 2. Recently active areas
|
|
23
|
+
findings.push(...detectActiveAreas(dir, activeAreas));
|
|
24
|
+
// 3. Co-change coupling -- invisible dependencies
|
|
25
|
+
findings.push(...detectCoChangeCoupling(dir, coChangeClusters));
|
|
26
|
+
// 4. Rapid re-edits -- code that was hard to get right
|
|
27
|
+
findings.push(...detectRapidReEdits(dir));
|
|
28
|
+
// 5. Commit message patterns -- development focus
|
|
29
|
+
findings.push(...detectCommitPatterns(dir));
|
|
30
|
+
return { findings, activeAreas, revertedPatterns, coChangeClusters };
|
|
31
|
+
}
|
|
32
|
+
function isGitRepo(dir) {
|
|
33
|
+
try {
|
|
34
|
+
execSync("git rev-parse --is-inside-work-tree", {
|
|
35
|
+
cwd: dir,
|
|
36
|
+
stdio: "pipe",
|
|
37
|
+
});
|
|
38
|
+
return true;
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
function git(dir, args) {
|
|
45
|
+
try {
|
|
46
|
+
return execSync(`git ${args}`, {
|
|
47
|
+
cwd: dir,
|
|
48
|
+
stdio: "pipe",
|
|
49
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
50
|
+
}).toString();
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return "";
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Find reverted commits -- these are explicit "we tried this and it didn't work" signals.
|
|
58
|
+
*/
|
|
59
|
+
function detectRevertedPatterns(dir, revertedPatterns) {
|
|
60
|
+
const findings = [];
|
|
61
|
+
const revertLog = git(dir, 'log --grep="^Revert" --oneline --since="1 year ago" -50');
|
|
62
|
+
if (!revertLog.trim())
|
|
63
|
+
return findings;
|
|
64
|
+
const reverts = revertLog.trim().split("\n").filter(Boolean);
|
|
65
|
+
if (reverts.length >= 2) {
|
|
66
|
+
// Extract what was reverted
|
|
67
|
+
const revertDescriptions = [];
|
|
68
|
+
for (const line of reverts.slice(0, 10)) {
|
|
69
|
+
const match = line.match(/^[a-f0-9]+ Revert "(.+)"/);
|
|
70
|
+
if (match) {
|
|
71
|
+
revertDescriptions.push(match[1]);
|
|
72
|
+
revertedPatterns.push(match[1]);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (revertDescriptions.length > 0) {
|
|
76
|
+
findings.push({
|
|
77
|
+
category: "Git history",
|
|
78
|
+
description: `${reverts.length} reverted commits in the last year. Previously attempted and rolled back: ${revertDescriptions.slice(0, 3).join("; ")}${revertDescriptions.length > 3 ? "; ..." : ""}`,
|
|
79
|
+
rationale: "Reverted commits are explicit signals of approaches that were tried and failed. Agents should avoid re-attempting these patterns.",
|
|
80
|
+
confidence: "high",
|
|
81
|
+
discoverable: false,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return findings;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Find recently active areas -- where development is concentrated.
|
|
89
|
+
*/
|
|
90
|
+
function detectActiveAreas(dir, activeAreas) {
|
|
91
|
+
const findings = [];
|
|
92
|
+
// Get files changed in the last 30 days, count changes per directory
|
|
93
|
+
const recentChanges = git(dir, 'log --since="30 days ago" --name-only --pretty=format: --diff-filter=AMRC');
|
|
94
|
+
if (!recentChanges.trim())
|
|
95
|
+
return findings;
|
|
96
|
+
const dirCounts = new Map();
|
|
97
|
+
for (const file of recentChanges.trim().split("\n").filter(Boolean)) {
|
|
98
|
+
const dir = path.dirname(file);
|
|
99
|
+
const topDir = dir.split(path.sep)[0] || dir;
|
|
100
|
+
if (topDir === "." || topDir === "node_modules")
|
|
101
|
+
continue;
|
|
102
|
+
dirCounts.set(topDir, (dirCounts.get(topDir) || 0) + 1);
|
|
103
|
+
}
|
|
104
|
+
// Sort by activity
|
|
105
|
+
const sorted = [...dirCounts.entries()]
|
|
106
|
+
.sort((a, b) => b[1] - a[1])
|
|
107
|
+
.slice(0, 5);
|
|
108
|
+
if (sorted.length >= 2) {
|
|
109
|
+
const topAreas = sorted
|
|
110
|
+
.filter(([, count]) => count >= 3)
|
|
111
|
+
.map(([dir, count]) => `${dir}/ (${count} changes)`);
|
|
112
|
+
if (topAreas.length > 0) {
|
|
113
|
+
activeAreas.push(...sorted.map(([dir]) => dir));
|
|
114
|
+
findings.push({
|
|
115
|
+
category: "Active development",
|
|
116
|
+
description: `Most active areas in the last 30 days: ${topAreas.join(", ")}. Expect ongoing changes here.`,
|
|
117
|
+
rationale: "Active areas may have in-progress refactoring. Check recent commits before making large changes.",
|
|
118
|
+
confidence: "medium",
|
|
119
|
+
discoverable: false,
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return findings;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Detect co-change coupling -- files that are always committed together
|
|
127
|
+
* but have no import relationship. These are invisible dependencies.
|
|
128
|
+
*/
|
|
129
|
+
function detectCoChangeCoupling(dir, clusters) {
|
|
130
|
+
const findings = [];
|
|
131
|
+
// Get the last 200 commits with their changed files
|
|
132
|
+
const log = git(dir, 'log --name-only --pretty=format:"COMMIT" --since="6 months ago" -200');
|
|
133
|
+
if (!log.trim())
|
|
134
|
+
return findings;
|
|
135
|
+
// Parse commits into file groups
|
|
136
|
+
const commits = [];
|
|
137
|
+
let current = [];
|
|
138
|
+
for (const line of log.split("\n")) {
|
|
139
|
+
if (line.trim() === '"COMMIT"' || line.trim() === "COMMIT") {
|
|
140
|
+
if (current.length > 0)
|
|
141
|
+
commits.push(current);
|
|
142
|
+
current = [];
|
|
143
|
+
}
|
|
144
|
+
else if (line.trim() && !line.includes("node_modules")) {
|
|
145
|
+
current.push(line.trim());
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
if (current.length > 0)
|
|
149
|
+
commits.push(current);
|
|
150
|
+
// Build co-occurrence matrix (only for source files)
|
|
151
|
+
const sourceExts = new Set([
|
|
152
|
+
".ts",
|
|
153
|
+
".tsx",
|
|
154
|
+
".js",
|
|
155
|
+
".jsx",
|
|
156
|
+
".py",
|
|
157
|
+
".go",
|
|
158
|
+
".rs",
|
|
159
|
+
]);
|
|
160
|
+
const pairCounts = new Map();
|
|
161
|
+
const fileCounts = new Map();
|
|
162
|
+
for (const commit of commits) {
|
|
163
|
+
const sourceFiles = commit.filter((f) => sourceExts.has(path.extname(f).toLowerCase()));
|
|
164
|
+
// Skip mega-commits (likely merges or bulk changes)
|
|
165
|
+
if (sourceFiles.length > 20 || sourceFiles.length < 2)
|
|
166
|
+
continue;
|
|
167
|
+
for (const file of sourceFiles) {
|
|
168
|
+
fileCounts.set(file, (fileCounts.get(file) || 0) + 1);
|
|
169
|
+
}
|
|
170
|
+
// Count pairs
|
|
171
|
+
for (let i = 0; i < sourceFiles.length; i++) {
|
|
172
|
+
for (let j = i + 1; j < sourceFiles.length; j++) {
|
|
173
|
+
const pair = [sourceFiles[i], sourceFiles[j]].sort().join("|||");
|
|
174
|
+
pairCounts.set(pair, (pairCounts.get(pair) || 0) + 1);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// Find statistically significant co-changes
|
|
179
|
+
// (files committed together more than expected by chance)
|
|
180
|
+
const significantPairs = [];
|
|
181
|
+
for (const [pairKey, count] of pairCounts) {
|
|
182
|
+
if (count < 4)
|
|
183
|
+
continue; // Need at least 4 co-occurrences
|
|
184
|
+
const [fileA, fileB] = pairKey.split("|||");
|
|
185
|
+
const countA = fileCounts.get(fileA) || 0;
|
|
186
|
+
const countB = fileCounts.get(fileB) || 0;
|
|
187
|
+
// Skip files in the same directory (obvious coupling)
|
|
188
|
+
if (path.dirname(fileA) === path.dirname(fileB))
|
|
189
|
+
continue;
|
|
190
|
+
// Jaccard-like strength: co-changes / union of changes
|
|
191
|
+
const strength = count / (countA + countB - count);
|
|
192
|
+
if (strength > 0.3) {
|
|
193
|
+
significantPairs.push({
|
|
194
|
+
files: [fileA, fileB],
|
|
195
|
+
count,
|
|
196
|
+
strength,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// Sort by strength
|
|
201
|
+
significantPairs.sort((a, b) => b.strength - a.strength);
|
|
202
|
+
if (significantPairs.length > 0) {
|
|
203
|
+
const topPairs = significantPairs.slice(0, 5);
|
|
204
|
+
for (const pair of topPairs) {
|
|
205
|
+
clusters.push([pair.files[0], pair.files[1], pair.count]);
|
|
206
|
+
}
|
|
207
|
+
const pairDescriptions = topPairs
|
|
208
|
+
.slice(0, 3)
|
|
209
|
+
.map((p) => `${path.basename(p.files[0])} ↔ ${path.basename(p.files[1])} (${p.count} co-commits)`);
|
|
210
|
+
findings.push({
|
|
211
|
+
category: "Hidden dependencies",
|
|
212
|
+
description: `Files that change together across directories (invisible coupling): ${pairDescriptions.join("; ")}`,
|
|
213
|
+
rationale: "These files have no import relationship but are always modified together. Changing one without the other likely introduces bugs.",
|
|
214
|
+
confidence: "high",
|
|
215
|
+
discoverable: false,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
return findings;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Detect files that were edited many times in quick succession --
|
|
222
|
+
* code that was hard to get right.
|
|
223
|
+
*/
|
|
224
|
+
function detectRapidReEdits(dir) {
|
|
225
|
+
const findings = [];
|
|
226
|
+
// Get files with high commit frequency in short windows
|
|
227
|
+
const log = git(dir, 'log --format="%H %aI" --name-only --since="3 months ago" -300');
|
|
228
|
+
if (!log.trim())
|
|
229
|
+
return findings;
|
|
230
|
+
// Track edits per file with timestamps
|
|
231
|
+
const fileEdits = new Map();
|
|
232
|
+
let currentDate = null;
|
|
233
|
+
for (const line of log.split("\n")) {
|
|
234
|
+
const commitMatch = line.match(/^[a-f0-9]{40} (\d{4}-\d{2}-\d{2})/);
|
|
235
|
+
if (commitMatch) {
|
|
236
|
+
currentDate = new Date(commitMatch[1]);
|
|
237
|
+
}
|
|
238
|
+
else if (line.trim() && currentDate && !line.includes("node_modules")) {
|
|
239
|
+
const file = line.trim();
|
|
240
|
+
if (!fileEdits.has(file))
|
|
241
|
+
fileEdits.set(file, []);
|
|
242
|
+
fileEdits.get(file).push(currentDate);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
// Find files edited 5+ times within a 7-day window
|
|
246
|
+
const churnyFiles = [];
|
|
247
|
+
for (const [file, dates] of fileEdits) {
|
|
248
|
+
if (dates.length < 5)
|
|
249
|
+
continue;
|
|
250
|
+
// Sort dates
|
|
251
|
+
dates.sort((a, b) => a.getTime() - b.getTime());
|
|
252
|
+
// Sliding window: find any 7-day window with 5+ edits
|
|
253
|
+
for (let i = 0; i <= dates.length - 5; i++) {
|
|
254
|
+
const windowStart = dates[i];
|
|
255
|
+
const windowEnd = new Date(windowStart.getTime() + 7 * 24 * 60 * 60 * 1000);
|
|
256
|
+
const editsInWindow = dates.filter((d) => d >= windowStart && d <= windowEnd).length;
|
|
257
|
+
if (editsInWindow >= 5) {
|
|
258
|
+
churnyFiles.push({
|
|
259
|
+
file,
|
|
260
|
+
edits: editsInWindow,
|
|
261
|
+
window: `${windowStart.toISOString().split("T")[0]}`,
|
|
262
|
+
});
|
|
263
|
+
break; // One detection per file is enough
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
// Sort by edit count
|
|
268
|
+
churnyFiles.sort((a, b) => b.edits - a.edits);
|
|
269
|
+
if (churnyFiles.length > 0) {
|
|
270
|
+
const topFiles = churnyFiles
|
|
271
|
+
.slice(0, 3)
|
|
272
|
+
.map((f) => `${f.file} (${f.edits} edits in one week)`);
|
|
273
|
+
findings.push({
|
|
274
|
+
category: "Fragile code",
|
|
275
|
+
description: `Files that required many rapid edits (hard to get right): ${topFiles.join("; ")}`,
|
|
276
|
+
rationale: "High churn in short windows indicates tricky logic. Take extra care when modifying these files.",
|
|
277
|
+
confidence: "medium",
|
|
278
|
+
discoverable: false,
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
return findings;
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Detect commit message patterns -- reveals development focus and conventions.
|
|
285
|
+
*/
|
|
286
|
+
function detectCommitPatterns(dir) {
|
|
287
|
+
const findings = [];
|
|
288
|
+
const log = git(dir, 'log --oneline --since="6 months ago" -200');
|
|
289
|
+
if (!log.trim())
|
|
290
|
+
return findings;
|
|
291
|
+
const messages = log.trim().split("\n").filter(Boolean);
|
|
292
|
+
// Detect conventional commits usage
|
|
293
|
+
const conventionalPattern = /^[a-f0-9]+ (feat|fix|docs|refactor|test|chore|style|perf|ci|build)(\(.+?\))?[!:]?\s*:/;
|
|
294
|
+
const conventionalCount = messages.filter((m) => conventionalPattern.test(m)).length;
|
|
295
|
+
if (conventionalCount > messages.length * 0.5) {
|
|
296
|
+
// Extract scope patterns
|
|
297
|
+
const scopes = new Map();
|
|
298
|
+
for (const msg of messages) {
|
|
299
|
+
const match = msg.match(conventionalPattern);
|
|
300
|
+
if (match?.[2]) {
|
|
301
|
+
const scope = match[2].replace(/[()]/g, "");
|
|
302
|
+
scopes.set(scope, (scopes.get(scope) || 0) + 1);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
const topScopes = [...scopes.entries()]
|
|
306
|
+
.sort((a, b) => b[1] - a[1])
|
|
307
|
+
.slice(0, 5)
|
|
308
|
+
.map(([scope]) => scope);
|
|
309
|
+
findings.push({
|
|
310
|
+
category: "Commit conventions",
|
|
311
|
+
description: `Uses Conventional Commits (feat/fix/docs/etc). ${topScopes.length > 0 ? `Common scopes: ${topScopes.join(", ")}` : ""}. Follow this pattern for new commits.`,
|
|
312
|
+
confidence: "high",
|
|
313
|
+
discoverable: false,
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
return findings;
|
|
317
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { Finding } from "../types.js";
|
|
2
|
+
interface GraphAnalysis {
|
|
3
|
+
/** Files ranked by importance (PageRank) */
|
|
4
|
+
rankedFiles: {
|
|
5
|
+
file: string;
|
|
6
|
+
score: number;
|
|
7
|
+
}[];
|
|
8
|
+
/** Findings about architecture from the graph */
|
|
9
|
+
findings: Finding[];
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Build an import/dependency graph and run PageRank to identify
|
|
13
|
+
* the most structurally important files. Conventions found in
|
|
14
|
+
* high-PageRank files are likely canonical.
|
|
15
|
+
*/
|
|
16
|
+
export declare function analyzeImportGraph(dir: string, files: string[]): Promise<GraphAnalysis>;
|
|
17
|
+
export {};
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
/**
|
|
4
|
+
* Build an import/dependency graph and run PageRank to identify
|
|
5
|
+
* the most structurally important files. Conventions found in
|
|
6
|
+
* high-PageRank files are likely canonical.
|
|
7
|
+
*/
|
|
8
|
+
export async function analyzeImportGraph(dir, files) {
|
|
9
|
+
const findings = [];
|
|
10
|
+
// Only analyze source files
|
|
11
|
+
const sourceFiles = files.filter((f) => /\.(ts|tsx|js|jsx)$/.test(f));
|
|
12
|
+
if (sourceFiles.length < 5) {
|
|
13
|
+
return { rankedFiles: [], findings };
|
|
14
|
+
}
|
|
15
|
+
// Extract imports from each file
|
|
16
|
+
const edges = [];
|
|
17
|
+
const fileSet = new Set(sourceFiles);
|
|
18
|
+
for (const file of sourceFiles) {
|
|
19
|
+
const filePath = path.join(dir, file);
|
|
20
|
+
let content;
|
|
21
|
+
try {
|
|
22
|
+
content = fs.readFileSync(filePath, "utf-8");
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const imports = extractImports(content);
|
|
28
|
+
for (const imp of imports) {
|
|
29
|
+
const resolved = resolveImport(imp, file, fileSet, dir);
|
|
30
|
+
if (resolved) {
|
|
31
|
+
edges.push({ from: file, to: resolved });
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
if (edges.length < 5) {
|
|
36
|
+
return { rankedFiles: [], findings };
|
|
37
|
+
}
|
|
38
|
+
// Run PageRank
|
|
39
|
+
const scores = pageRank(sourceFiles, edges, 20, 0.85);
|
|
40
|
+
// Sort by score descending
|
|
41
|
+
const rankedFiles = [...scores.entries()]
|
|
42
|
+
.sort((a, b) => b[1] - a[1])
|
|
43
|
+
.map(([file, score]) => ({ file, score }));
|
|
44
|
+
// Find hub files (high fan-in -- many files import them)
|
|
45
|
+
const fanIn = new Map();
|
|
46
|
+
for (const edge of edges) {
|
|
47
|
+
fanIn.set(edge.to, (fanIn.get(edge.to) || 0) + 1);
|
|
48
|
+
}
|
|
49
|
+
const hubs = [...fanIn.entries()]
|
|
50
|
+
.filter(([, count]) => count >= 5)
|
|
51
|
+
.sort((a, b) => b[1] - a[1]);
|
|
52
|
+
if (hubs.length > 0) {
|
|
53
|
+
const hubList = hubs
|
|
54
|
+
.slice(0, 5)
|
|
55
|
+
.map(([file, count]) => `${file} (imported by ${count} files)`);
|
|
56
|
+
findings.push({
|
|
57
|
+
category: "Core modules",
|
|
58
|
+
description: `Hub files (most depended on): ${hubList.join("; ")}. Changes here have the widest blast radius.`,
|
|
59
|
+
rationale: "These are the most imported files in the project. Modifying them affects many consumers. Test thoroughly after changes.",
|
|
60
|
+
confidence: "high",
|
|
61
|
+
discoverable: false,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
// Detect potential circular dependencies
|
|
65
|
+
const cycles = detectCycles(edges, sourceFiles);
|
|
66
|
+
if (cycles.length > 0) {
|
|
67
|
+
const cycleDescriptions = cycles
|
|
68
|
+
.slice(0, 3)
|
|
69
|
+
.map((c) => c.map((f) => path.basename(f)).join(" → "));
|
|
70
|
+
findings.push({
|
|
71
|
+
category: "Circular dependencies",
|
|
72
|
+
description: `Circular import chains detected: ${cycleDescriptions.join("; ")}. Avoid adding to these cycles.`,
|
|
73
|
+
rationale: "Circular dependencies cause subtle bugs (undefined imports, initialization order issues). Agents may unknowingly create new cycles.",
|
|
74
|
+
confidence: "high",
|
|
75
|
+
discoverable: false,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
// Detect orphan files (no imports, not imported)
|
|
79
|
+
const connectedFiles = new Set();
|
|
80
|
+
for (const edge of edges) {
|
|
81
|
+
connectedFiles.add(edge.from);
|
|
82
|
+
connectedFiles.add(edge.to);
|
|
83
|
+
}
|
|
84
|
+
const orphans = sourceFiles.filter((f) => !connectedFiles.has(f) &&
|
|
85
|
+
!f.includes("test") &&
|
|
86
|
+
!f.includes("spec") &&
|
|
87
|
+
!f.includes(".config") &&
|
|
88
|
+
!f.endsWith(".d.ts"));
|
|
89
|
+
if (orphans.length >= 3 && orphans.length < sourceFiles.length * 0.3) {
|
|
90
|
+
findings.push({
|
|
91
|
+
category: "Dead code candidates",
|
|
92
|
+
description: `${orphans.length} source files have no import connections (potential dead code): ${orphans.slice(0, 5).join(", ")}${orphans.length > 5 ? ", ..." : ""}`,
|
|
93
|
+
confidence: "low",
|
|
94
|
+
discoverable: false,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
return { rankedFiles, findings };
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Extract import paths from a source file using regex.
|
|
101
|
+
* Not as robust as Tree-sitter but fast and sufficient for graph building.
|
|
102
|
+
*/
|
|
103
|
+
function extractImports(content) {
|
|
104
|
+
const imports = [];
|
|
105
|
+
// ES imports: import ... from "path"
|
|
106
|
+
const esImports = content.matchAll(/(?:import|export)\s+.*?\s+from\s+['"]([^'"]+)['"]/g);
|
|
107
|
+
for (const match of esImports) {
|
|
108
|
+
imports.push(match[1]);
|
|
109
|
+
}
|
|
110
|
+
// Dynamic imports: import("path")
|
|
111
|
+
const dynamicImports = content.matchAll(/import\s*\(\s*['"]([^'"]+)['"]\s*\)/g);
|
|
112
|
+
for (const match of dynamicImports) {
|
|
113
|
+
imports.push(match[1]);
|
|
114
|
+
}
|
|
115
|
+
// require: require("path")
|
|
116
|
+
const requires = content.matchAll(/require\s*\(\s*['"]([^'"]+)['"]\s*\)/g);
|
|
117
|
+
for (const match of requires) {
|
|
118
|
+
imports.push(match[1]);
|
|
119
|
+
}
|
|
120
|
+
// Only return relative imports (not packages)
|
|
121
|
+
return imports.filter((p) => p.startsWith(".") || p.startsWith("@/") || p.startsWith("~/"));
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Resolve an import path to an actual file in the project.
|
|
125
|
+
*/
|
|
126
|
+
function resolveImport(importPath, fromFile, fileSet, dir) {
|
|
127
|
+
let resolved;
|
|
128
|
+
if (importPath.startsWith("@/") || importPath.startsWith("~/")) {
|
|
129
|
+
// Path alias -- resolve from root
|
|
130
|
+
resolved = importPath.replace(/^[@~]\//, "src/");
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
// Relative import
|
|
134
|
+
resolved = path.normalize(path.join(path.dirname(fromFile), importPath));
|
|
135
|
+
}
|
|
136
|
+
// Try exact match, then with extensions, then as directory index
|
|
137
|
+
const candidates = [
|
|
138
|
+
resolved,
|
|
139
|
+
`${resolved}.ts`,
|
|
140
|
+
`${resolved}.tsx`,
|
|
141
|
+
`${resolved}.js`,
|
|
142
|
+
`${resolved}.jsx`,
|
|
143
|
+
`${resolved}/index.ts`,
|
|
144
|
+
`${resolved}/index.tsx`,
|
|
145
|
+
`${resolved}/index.js`,
|
|
146
|
+
`${resolved}/index.jsx`,
|
|
147
|
+
];
|
|
148
|
+
for (const candidate of candidates) {
|
|
149
|
+
// Normalize to remove leading ./
|
|
150
|
+
const normalized = candidate.replace(/^\.\//, "");
|
|
151
|
+
if (fileSet.has(normalized))
|
|
152
|
+
return normalized;
|
|
153
|
+
}
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Simple PageRank implementation.
|
|
158
|
+
* No external dependencies needed.
|
|
159
|
+
*/
|
|
160
|
+
function pageRank(nodes, edges, iterations, damping) {
|
|
161
|
+
const n = nodes.length;
|
|
162
|
+
const scores = new Map();
|
|
163
|
+
const outDegree = new Map();
|
|
164
|
+
// Initialize
|
|
165
|
+
for (const node of nodes) {
|
|
166
|
+
scores.set(node, 1 / n);
|
|
167
|
+
outDegree.set(node, 0);
|
|
168
|
+
}
|
|
169
|
+
// Count outgoing edges
|
|
170
|
+
for (const edge of edges) {
|
|
171
|
+
outDegree.set(edge.from, (outDegree.get(edge.from) || 0) + 1);
|
|
172
|
+
}
|
|
173
|
+
// Build adjacency (incoming edges)
|
|
174
|
+
const incoming = new Map();
|
|
175
|
+
for (const node of nodes) {
|
|
176
|
+
incoming.set(node, []);
|
|
177
|
+
}
|
|
178
|
+
for (const edge of edges) {
|
|
179
|
+
if (incoming.has(edge.to)) {
|
|
180
|
+
incoming.get(edge.to).push(edge.from);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// Iterate
|
|
184
|
+
for (let iter = 0; iter < iterations; iter++) {
|
|
185
|
+
const newScores = new Map();
|
|
186
|
+
for (const node of nodes) {
|
|
187
|
+
let sum = 0;
|
|
188
|
+
for (const inNode of incoming.get(node) || []) {
|
|
189
|
+
const out = outDegree.get(inNode) || 1;
|
|
190
|
+
sum += (scores.get(inNode) || 0) / out;
|
|
191
|
+
}
|
|
192
|
+
newScores.set(node, (1 - damping) / n + damping * sum);
|
|
193
|
+
}
|
|
194
|
+
// Update
|
|
195
|
+
for (const [node, score] of newScores) {
|
|
196
|
+
scores.set(node, score);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
return scores;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Detect circular dependencies using DFS.
|
|
203
|
+
* Returns up to 5 short cycles.
|
|
204
|
+
*/
|
|
205
|
+
function detectCycles(edges, files) {
|
|
206
|
+
const adj = new Map();
|
|
207
|
+
for (const file of files)
|
|
208
|
+
adj.set(file, []);
|
|
209
|
+
for (const edge of edges) {
|
|
210
|
+
if (adj.has(edge.from)) {
|
|
211
|
+
adj.get(edge.from).push(edge.to);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
const cycles = [];
|
|
215
|
+
const visited = new Set();
|
|
216
|
+
const stack = new Set();
|
|
217
|
+
const path = [];
|
|
218
|
+
function dfs(node) {
|
|
219
|
+
if (cycles.length >= 5)
|
|
220
|
+
return;
|
|
221
|
+
if (stack.has(node)) {
|
|
222
|
+
// Found a cycle
|
|
223
|
+
const cycleStart = path.indexOf(node);
|
|
224
|
+
if (cycleStart !== -1) {
|
|
225
|
+
const cycle = [...path.slice(cycleStart), node];
|
|
226
|
+
if (cycle.length <= 5) {
|
|
227
|
+
// Only report short cycles
|
|
228
|
+
cycles.push(cycle);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
if (visited.has(node))
|
|
234
|
+
return;
|
|
235
|
+
visited.add(node);
|
|
236
|
+
stack.add(node);
|
|
237
|
+
path.push(node);
|
|
238
|
+
for (const neighbor of adj.get(node) || []) {
|
|
239
|
+
dfs(neighbor);
|
|
240
|
+
}
|
|
241
|
+
stack.delete(node);
|
|
242
|
+
path.pop();
|
|
243
|
+
}
|
|
244
|
+
for (const file of files) {
|
|
245
|
+
if (!visited.has(file))
|
|
246
|
+
dfs(file);
|
|
247
|
+
if (cycles.length >= 5)
|
|
248
|
+
break;
|
|
249
|
+
}
|
|
250
|
+
return cycles;
|
|
251
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { globSync } from "glob";
|
|
3
|
+
import { detectFrameworks } from "./frameworks.js";
|
|
4
|
+
import { detectBuildCommands } from "./build.js";
|
|
5
|
+
import { detectPatterns } from "./patterns.js";
|
|
6
|
+
import { detectProjectStructure } from "./structure.js";
|
|
7
|
+
import { analyzeGitHistory } from "./git.js";
|
|
8
|
+
import { analyzeImportGraph } from "./graph.js";
|
|
9
|
+
const IGNORE_PATTERNS = [
|
|
10
|
+
"**/node_modules/**",
|
|
11
|
+
"**/dist/**",
|
|
12
|
+
"**/build/**",
|
|
13
|
+
"**/.git/**",
|
|
14
|
+
"**/.next/**",
|
|
15
|
+
"**/coverage/**",
|
|
16
|
+
"**/.expo/**",
|
|
17
|
+
"**/android/**",
|
|
18
|
+
"**/ios/**",
|
|
19
|
+
"**/*.lock",
|
|
20
|
+
"**/package-lock.json",
|
|
21
|
+
];
|
|
22
|
+
export async function scanProject(dir) {
|
|
23
|
+
// Collect all files
|
|
24
|
+
const files = globSync("**/*", {
|
|
25
|
+
cwd: dir,
|
|
26
|
+
nodir: true,
|
|
27
|
+
ignore: IGNORE_PATTERNS,
|
|
28
|
+
dot: true,
|
|
29
|
+
});
|
|
30
|
+
// Detect languages from file extensions
|
|
31
|
+
const languages = detectLanguages(files);
|
|
32
|
+
// Detect frameworks from package.json, config files, etc.
|
|
33
|
+
const frameworks = await detectFrameworks(dir, files);
|
|
34
|
+
// Detect build/test/dev commands
|
|
35
|
+
const commands = await detectBuildCommands(dir);
|
|
36
|
+
// Detect project structure patterns
|
|
37
|
+
const structure = detectProjectStructure(dir, files);
|
|
38
|
+
// Detect code patterns and conventions (the non-obvious stuff)
|
|
39
|
+
const patterns = await detectPatterns(dir, files, frameworks.map((fw) => fw.name));
|
|
40
|
+
// Analyze git history for decision shadows and hidden dependencies
|
|
41
|
+
const gitAnalysis = await analyzeGitHistory(dir);
|
|
42
|
+
// Build import graph and run PageRank for structural importance
|
|
43
|
+
const graphAnalysis = await analyzeImportGraph(dir, files);
|
|
44
|
+
// Compile findings -- things an agent wouldn't figure out on its own
|
|
45
|
+
const findings = [
|
|
46
|
+
...frameworks.map((fw) => fw.findings).flat(),
|
|
47
|
+
...structure.findings,
|
|
48
|
+
...patterns,
|
|
49
|
+
...gitAnalysis.findings,
|
|
50
|
+
...graphAnalysis.findings,
|
|
51
|
+
];
|
|
52
|
+
return {
|
|
53
|
+
dir,
|
|
54
|
+
files,
|
|
55
|
+
languages,
|
|
56
|
+
frameworks: frameworks.map((f) => f.name),
|
|
57
|
+
commands,
|
|
58
|
+
structure,
|
|
59
|
+
findings,
|
|
60
|
+
rankedFiles: graphAnalysis.rankedFiles,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
function detectLanguages(files) {
|
|
64
|
+
const extMap = {
|
|
65
|
+
".ts": "TypeScript",
|
|
66
|
+
".tsx": "TypeScript",
|
|
67
|
+
".js": "JavaScript",
|
|
68
|
+
".jsx": "JavaScript",
|
|
69
|
+
".py": "Python",
|
|
70
|
+
".go": "Go",
|
|
71
|
+
".rs": "Rust",
|
|
72
|
+
".rb": "Ruby",
|
|
73
|
+
".java": "Java",
|
|
74
|
+
".swift": "Swift",
|
|
75
|
+
".kt": "Kotlin",
|
|
76
|
+
".css": "CSS",
|
|
77
|
+
".scss": "SCSS",
|
|
78
|
+
".html": "HTML",
|
|
79
|
+
};
|
|
80
|
+
const found = new Set();
|
|
81
|
+
for (const file of files) {
|
|
82
|
+
const ext = path.extname(file).toLowerCase();
|
|
83
|
+
if (extMap[ext])
|
|
84
|
+
found.add(extMap[ext]);
|
|
85
|
+
}
|
|
86
|
+
return [...found];
|
|
87
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { Finding } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Detect code patterns and conventions that are non-obvious.
|
|
4
|
+
* This is the core intelligence layer -- finding things agents miss.
|
|
5
|
+
*/
|
|
6
|
+
export declare function detectPatterns(dir: string, files: string[], frameworks: string[]): Promise<Finding[]>;
|