@brunobrise/xfeat 1.1.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/examples/nanobot-features.md +1 -1
- package/index.js +182 -16
- package/index.test.js +4 -4
- package/package.json +3 -2
package/index.js
CHANGED
|
@@ -17,21 +17,30 @@ const anthropic = new Anthropic({
|
|
|
17
17
|
// Helper: Read and parse .gitignore
|
|
18
18
|
async function getIgnores(targetDir) {
|
|
19
19
|
const ig = ignore().add([
|
|
20
|
-
"node_modules",
|
|
21
|
-
"bower_components",
|
|
22
|
-
"vendor",
|
|
23
|
-
"venv",
|
|
24
|
-
".venv",
|
|
25
|
-
"env",
|
|
26
|
-
"__pycache__",
|
|
27
|
-
".tox",
|
|
28
|
-
"target",
|
|
29
|
-
"packages",
|
|
30
|
-
".gradle",
|
|
31
|
-
".git",
|
|
32
|
-
"dist",
|
|
33
|
-
"build",
|
|
20
|
+
"node_modules/",
|
|
21
|
+
"bower_components/",
|
|
22
|
+
"vendor/",
|
|
23
|
+
"venv/",
|
|
24
|
+
".venv/",
|
|
25
|
+
"env/",
|
|
26
|
+
"__pycache__/",
|
|
27
|
+
".tox/",
|
|
28
|
+
"target/",
|
|
29
|
+
"packages/",
|
|
30
|
+
".gradle/",
|
|
31
|
+
".git/",
|
|
32
|
+
"dist/",
|
|
33
|
+
"build/",
|
|
34
|
+
"out/",
|
|
34
35
|
"*.min.js",
|
|
36
|
+
"android/",
|
|
37
|
+
"ios/",
|
|
38
|
+
".next/",
|
|
39
|
+
"nextjs/",
|
|
40
|
+
"coverage/",
|
|
41
|
+
"tmp/",
|
|
42
|
+
"temp/",
|
|
43
|
+
".expo/",
|
|
35
44
|
]);
|
|
36
45
|
try {
|
|
37
46
|
const gitignoreContent = await fs.readFile(
|
|
@@ -42,6 +51,17 @@ async function getIgnores(targetDir) {
|
|
|
42
51
|
} catch (err) {
|
|
43
52
|
// No .gitignore found, proceed with defaults
|
|
44
53
|
}
|
|
54
|
+
|
|
55
|
+
try {
|
|
56
|
+
const xfeatignoreContent = await fs.readFile(
|
|
57
|
+
path.join(targetDir, ".xfeatignore"),
|
|
58
|
+
"utf8",
|
|
59
|
+
);
|
|
60
|
+
ig.add(xfeatignoreContent);
|
|
61
|
+
} catch (err) {
|
|
62
|
+
// No .xfeatignore found, silently continue
|
|
63
|
+
}
|
|
64
|
+
|
|
45
65
|
return ig;
|
|
46
66
|
}
|
|
47
67
|
|
|
@@ -218,6 +238,111 @@ async function extractStructure(filePath) {
|
|
|
218
238
|
|
|
219
239
|
// --- LLM Stages ---
|
|
220
240
|
|
|
241
|
+
// Stage 0: AI File Pre-filtering
|
|
242
|
+
async function prefilterFilesWithClaude(filePaths, targetDir) {
|
|
243
|
+
const CHUNK_SIZE = 1000;
|
|
244
|
+
const chunks = [];
|
|
245
|
+
for (let i = 0; i < filePaths.length; i += CHUNK_SIZE) {
|
|
246
|
+
chunks.push({
|
|
247
|
+
index: Math.floor(i / CHUNK_SIZE),
|
|
248
|
+
files: filePaths.slice(i, i + CHUNK_SIZE),
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const modelToUse =
|
|
253
|
+
process.env.CLAUDE_CODE_SUBAGENT_MODEL ||
|
|
254
|
+
process.env.ANTHROPIC_MODEL ||
|
|
255
|
+
"claude-3-7-sonnet-20250219";
|
|
256
|
+
|
|
257
|
+
const CONCURRENCY_LIMIT = parseInt(process.env.CONCURRENCY_LIMIT || "5", 10);
|
|
258
|
+
|
|
259
|
+
const chunkResults = await pMap(
|
|
260
|
+
chunks,
|
|
261
|
+
async (chunk) => {
|
|
262
|
+
const relativePaths = chunk.files.map((f) => path.relative(targetDir, f));
|
|
263
|
+
|
|
264
|
+
const prompt = `
|
|
265
|
+
You are an expert software architect. You are given a list of file paths from a codebase.
|
|
266
|
+
Your task is to filter this list by identifying and EXCLUDING any boilerplate, trivial configuration, auto-generated files, lock files, empty files, non-functional UI assets, or generic/non-core test files that would not contribute meaningfully to a high-level architectural summary of the core system.
|
|
267
|
+
|
|
268
|
+
Return ONLY a raw JSON array of strings containing the file paths that should be KEPT.
|
|
269
|
+
Do not include any explanations, markdown formatting, or backticks. Just the raw JSON array.
|
|
270
|
+
|
|
271
|
+
File paths (Chunk ${chunk.index + 1} of ${chunks.length}):
|
|
272
|
+
${JSON.stringify(relativePaths, null, 2)}
|
|
273
|
+
`;
|
|
274
|
+
|
|
275
|
+
let attempt = 0;
|
|
276
|
+
const explicitMaxRetries = 2;
|
|
277
|
+
let chunkKeptFiles = chunk.files; // Default to keeping all if it fails
|
|
278
|
+
|
|
279
|
+
while (attempt <= explicitMaxRetries) {
|
|
280
|
+
try {
|
|
281
|
+
const response = await anthropic.messages.create({
|
|
282
|
+
model: modelToUse,
|
|
283
|
+
max_tokens: 8000,
|
|
284
|
+
temperature: 0.1,
|
|
285
|
+
system:
|
|
286
|
+
"You are a technical analyst. You must return ONLY a raw JSON array of strings (file paths to retain). No markdown formatting.",
|
|
287
|
+
messages: [{ role: "user", content: prompt }],
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
let text =
|
|
291
|
+
response.content.find((c) => c.type === "text")?.text || "[]";
|
|
292
|
+
text = text
|
|
293
|
+
.replace(/^\s*```(json)?/i, "")
|
|
294
|
+
.replace(/```\s*$/i, "")
|
|
295
|
+
.trim();
|
|
296
|
+
|
|
297
|
+
const keptFilesRelative = JSON.parse(text);
|
|
298
|
+
if (!Array.isArray(keptFilesRelative)) {
|
|
299
|
+
throw new Error("AI did not return an array.");
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const keptFilesAbsolute = keptFilesRelative.map((p) =>
|
|
303
|
+
path.resolve(targetDir, p),
|
|
304
|
+
);
|
|
305
|
+
chunkKeptFiles = chunk.files.filter((f) =>
|
|
306
|
+
keptFilesAbsolute.includes(path.resolve(f)),
|
|
307
|
+
);
|
|
308
|
+
break;
|
|
309
|
+
} catch (err) {
|
|
310
|
+
if (err.status === 429 && attempt < explicitMaxRetries) {
|
|
311
|
+
attempt++;
|
|
312
|
+
const delay = Math.min(
|
|
313
|
+
Math.pow(2, attempt) * 2000 + Math.random() * 1000,
|
|
314
|
+
30000,
|
|
315
|
+
);
|
|
316
|
+
console.warn(
|
|
317
|
+
`\n⚠️ [AI Pre-filtering] API Rate Limit hit (429). Retrying in ${Math.round(delay / 1000)}s... (Attempt ${attempt}/${explicitMaxRetries})`,
|
|
318
|
+
);
|
|
319
|
+
await new Promise((res) => setTimeout(res, delay));
|
|
320
|
+
} else if (
|
|
321
|
+
attempt < explicitMaxRetries &&
|
|
322
|
+
err.name === "SyntaxError"
|
|
323
|
+
) {
|
|
324
|
+
attempt++;
|
|
325
|
+
} else {
|
|
326
|
+
console.warn(
|
|
327
|
+
`\n⚠️ AI filtering failed for chunk ${chunk.index + 1}, falling back to all files in chunk.`,
|
|
328
|
+
err.message,
|
|
329
|
+
);
|
|
330
|
+
break;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
return chunkKeptFiles;
|
|
335
|
+
},
|
|
336
|
+
CONCURRENCY_LIMIT,
|
|
337
|
+
);
|
|
338
|
+
|
|
339
|
+
let allKeptFiles = [];
|
|
340
|
+
for (const res of chunkResults) {
|
|
341
|
+
allKeptFiles = allKeptFiles.concat(res);
|
|
342
|
+
}
|
|
343
|
+
return allKeptFiles;
|
|
344
|
+
}
|
|
345
|
+
|
|
221
346
|
// Stage 1: File-Level (Micro)
|
|
222
347
|
async function extractFeaturesWithClaude(structuralData, targetDir) {
|
|
223
348
|
const fullPath = path.join(targetDir, structuralData.path);
|
|
@@ -333,6 +458,7 @@ async function extractComponentSummary(dirName, fileSummaries) {
|
|
|
333
458
|
1. What is the overarching purpose of this component?
|
|
334
459
|
2. What are the core macro-features it provides to the broader system?
|
|
335
460
|
3. Generate a relevant Mermaid.js diagram (e.g., C4 Context, Sequence, or State) showing how the files in this component interact or what flow they represent.
|
|
461
|
+
CRITICAL INSTRUCTION: When creating Mermaid diagrams, you MUST wrap node labels in double quotes if they contain any special characters (like parentheses, brackets, or strange punctuation). For example, use \`NodeID["Text with (parentheses)"]\` instead of \`NodeID[Text with (parentheses)]\`.
|
|
336
462
|
|
|
337
463
|
File Summaries:
|
|
338
464
|
${fileSummaries.map((f) => `### File: ${f.path}\n${f.features}`).join("\n\n")}
|
|
@@ -383,6 +509,7 @@ async function extractGlobalArchitecture(componentSummaries) {
|
|
|
383
509
|
1. Write an Executive Summary of what the entire codebase does.
|
|
384
510
|
2. Outline the major pillars/domains of the application.
|
|
385
511
|
3. Generate a high-level Mermaid.js Architecture Diagram showing how the main components interact.
|
|
512
|
+
CRITICAL INSTRUCTION: When creating Mermaid diagrams, you MUST wrap node labels in double quotes if they contain any special characters (like parentheses, brackets, or strange punctuation). For example, use \`NodeID["Text with (parentheses)"]\` instead of \`NodeID[Text with (parentheses)]\`.
|
|
386
513
|
|
|
387
514
|
Component Summaries:
|
|
388
515
|
${Object.entries(componentSummaries)
|
|
@@ -588,6 +715,21 @@ async function main() {
|
|
|
588
715
|
return;
|
|
589
716
|
}
|
|
590
717
|
|
|
718
|
+
// --- CONFIRMATION ENQUIRER ---
|
|
719
|
+
const { Confirm } = require("enquirer");
|
|
720
|
+
const prompt = new Confirm({
|
|
721
|
+
name: "proceed",
|
|
722
|
+
message: `Ready to analyze ${targetFiles.length} files. Should we run AI Pre-filtering (Stage 0) before continuing?`,
|
|
723
|
+
});
|
|
724
|
+
|
|
725
|
+
let useAiFilter = false;
|
|
726
|
+
try {
|
|
727
|
+
useAiFilter = await prompt.run();
|
|
728
|
+
} catch (err) {
|
|
729
|
+
console.log("Aborted.");
|
|
730
|
+
return;
|
|
731
|
+
}
|
|
732
|
+
|
|
591
733
|
// --- CACHE INITIALIZATION ---
|
|
592
734
|
const folderName = path.basename(path.resolve(targetDir));
|
|
593
735
|
const outputPath = path.join(process.cwd(), `${folderName}-features.md`);
|
|
@@ -631,14 +773,37 @@ async function main() {
|
|
|
631
773
|
|
|
632
774
|
const tasks = new Listr(
|
|
633
775
|
[
|
|
776
|
+
{
|
|
777
|
+
title: "STAGE 0: AI File Pre-filtering",
|
|
778
|
+
skip: () => !useAiFilter,
|
|
779
|
+
task: async (ctx, task) => {
|
|
780
|
+
task.output = `Analyzing ${targetFiles.length} files...`;
|
|
781
|
+
|
|
782
|
+
if (targetFiles.length === 0) {
|
|
783
|
+
ctx.filteredFiles = [];
|
|
784
|
+
task.title = "STAGE 0: AI File Pre-filtering (No files to filter)";
|
|
785
|
+
return;
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
const filtered = await prefilterFilesWithClaude(
|
|
789
|
+
targetFiles,
|
|
790
|
+
targetDir,
|
|
791
|
+
);
|
|
792
|
+
const removedCount = targetFiles.length - filtered.length;
|
|
793
|
+
|
|
794
|
+
ctx.filteredFiles = filtered;
|
|
795
|
+
task.title = `STAGE 0: AI File Pre-filtering (Removed ${removedCount} trivial files)`;
|
|
796
|
+
},
|
|
797
|
+
},
|
|
634
798
|
{
|
|
635
799
|
title: "Analyzing File Structure",
|
|
636
800
|
task: async (ctx, task) => {
|
|
637
801
|
const structuralData = [];
|
|
638
802
|
let completed = 0;
|
|
639
|
-
const
|
|
803
|
+
const filesToAnalyze = ctx.filteredFiles || targetFiles;
|
|
804
|
+
const total = filesToAnalyze.length;
|
|
640
805
|
|
|
641
|
-
for (const file of
|
|
806
|
+
for (const file of filesToAnalyze) {
|
|
642
807
|
if (file.endsWith("index.js") && __dirname === targetDir) {
|
|
643
808
|
completed++;
|
|
644
809
|
continue;
|
|
@@ -823,6 +988,7 @@ module.exports = {
|
|
|
823
988
|
getIgnores,
|
|
824
989
|
initTreeSitter,
|
|
825
990
|
extractStructure,
|
|
991
|
+
prefilterFilesWithClaude,
|
|
826
992
|
extractFeaturesWithClaude,
|
|
827
993
|
extractComponentSummary,
|
|
828
994
|
extractGlobalArchitecture,
|
package/index.test.js
CHANGED
|
@@ -69,8 +69,8 @@ def py_function():
|
|
|
69
69
|
const ig = await getIgnores(testDir);
|
|
70
70
|
|
|
71
71
|
// Defaults
|
|
72
|
-
expect(ig.ignores("node_modules")).toBe(true);
|
|
73
|
-
expect(ig.ignores(".git")).toBe(true);
|
|
72
|
+
expect(ig.ignores("node_modules/test.js")).toBe(true);
|
|
73
|
+
expect(ig.ignores(".git/config")).toBe(true);
|
|
74
74
|
|
|
75
75
|
// Custom rules
|
|
76
76
|
expect(ig.ignores("ignored_folder/file.js")).toBe(true);
|
|
@@ -86,8 +86,8 @@ def py_function():
|
|
|
86
86
|
await fs.mkdir(emptyDir, { recursive: true });
|
|
87
87
|
|
|
88
88
|
const ig = await getIgnores(emptyDir);
|
|
89
|
-
expect(ig.ignores("node_modules")).toBe(true);
|
|
90
|
-
expect(ig.ignores(".git")).toBe(true);
|
|
89
|
+
expect(ig.ignores("node_modules/test.js")).toBe(true);
|
|
90
|
+
expect(ig.ignores(".git/config")).toBe(true);
|
|
91
91
|
expect(ig.ignores("test.log")).toBe(false); // custom rule should be false
|
|
92
92
|
|
|
93
93
|
await fs.rm(emptyDir, { recursive: true, force: true });
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@brunobrise/xfeat",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "",
|
|
3
|
+
"version": "1.3.0",
|
|
4
|
+
"description": "Automated AI-driven CLI for codebase analysis and feature extraction.",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
7
7
|
},
|
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@anthropic-ai/sdk": "^0.78.0",
|
|
37
37
|
"dotenv": "^17.3.1",
|
|
38
|
+
"enquirer": "^2.4.1",
|
|
38
39
|
"fast-glob": "^3.3.3",
|
|
39
40
|
"ignore": "^7.0.5",
|
|
40
41
|
"listr2": "^6.6.1",
|