@brunobrise/xfeat 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -755,7 +755,7 @@ flowchart TB
755
755
  end
756
756
 
757
757
  subgraph ProviderLayer["LLM Provider Registry"]
758
- PR[Provider Router<br/>(LiteLLM)]
758
+ PR["Provider Router<br/>(LiteLLM)"]
759
759
  end
760
760
 
761
761
  subgraph Security["Security Layer"]
package/index.js CHANGED
@@ -17,21 +17,30 @@ const anthropic = new Anthropic({
17
17
  // Helper: Read and parse .gitignore
18
18
  async function getIgnores(targetDir) {
19
19
  const ig = ignore().add([
20
- "node_modules",
21
- "bower_components",
22
- "vendor",
23
- "venv",
24
- ".venv",
25
- "env",
26
- "__pycache__",
27
- ".tox",
28
- "target",
29
- "packages",
30
- ".gradle",
31
- ".git",
32
- "dist",
33
- "build",
20
+ "node_modules/",
21
+ "bower_components/",
22
+ "vendor/",
23
+ "venv/",
24
+ ".venv/",
25
+ "env/",
26
+ "__pycache__/",
27
+ ".tox/",
28
+ "target/",
29
+ "packages/",
30
+ ".gradle/",
31
+ ".git/",
32
+ "dist/",
33
+ "build/",
34
+ "out/",
34
35
  "*.min.js",
36
+ "android/",
37
+ "ios/",
38
+ ".next/",
39
+ "nextjs/",
40
+ "coverage/",
41
+ "tmp/",
42
+ "temp/",
43
+ ".expo/",
35
44
  ]);
36
45
  try {
37
46
  const gitignoreContent = await fs.readFile(
@@ -42,6 +51,17 @@ async function getIgnores(targetDir) {
42
51
  } catch (err) {
43
52
  // No .gitignore found, proceed with defaults
44
53
  }
54
+
55
+ try {
56
+ const xfeatignoreContent = await fs.readFile(
57
+ path.join(targetDir, ".xfeatignore"),
58
+ "utf8",
59
+ );
60
+ ig.add(xfeatignoreContent);
61
+ } catch (err) {
62
+ // No .xfeatignore found, silently continue
63
+ }
64
+
45
65
  return ig;
46
66
  }
47
67
 
@@ -218,6 +238,111 @@ async function extractStructure(filePath) {
218
238
 
219
239
  // --- LLM Stages ---
220
240
 
241
+ // Stage 0: AI File Pre-filtering
242
+ async function prefilterFilesWithClaude(filePaths, targetDir) {
243
+ const CHUNK_SIZE = 1000;
244
+ const chunks = [];
245
+ for (let i = 0; i < filePaths.length; i += CHUNK_SIZE) {
246
+ chunks.push({
247
+ index: Math.floor(i / CHUNK_SIZE),
248
+ files: filePaths.slice(i, i + CHUNK_SIZE),
249
+ });
250
+ }
251
+
252
+ const modelToUse =
253
+ process.env.CLAUDE_CODE_SUBAGENT_MODEL ||
254
+ process.env.ANTHROPIC_MODEL ||
255
+ "claude-3-7-sonnet-20250219";
256
+
257
+ const CONCURRENCY_LIMIT = parseInt(process.env.CONCURRENCY_LIMIT || "5", 10);
258
+
259
+ const chunkResults = await pMap(
260
+ chunks,
261
+ async (chunk) => {
262
+ const relativePaths = chunk.files.map((f) => path.relative(targetDir, f));
263
+
264
+ const prompt = `
265
+ You are an expert software architect. You are given a list of file paths from a codebase.
266
+ Your task is to filter this list by identifying and EXCLUDING any boilerplate, trivial configuration, auto-generated files, lock files, empty files, non-functional UI assets, or generic/non-core test files that would not contribute meaningfully to a high-level architectural summary of the core system.
267
+
268
+ Return ONLY a raw JSON array of strings containing the file paths that should be KEPT.
269
+ Do not include any explanations, markdown formatting, or backticks. Just the raw JSON array.
270
+
271
+ File paths (Chunk ${chunk.index + 1} of ${chunks.length}):
272
+ ${JSON.stringify(relativePaths, null, 2)}
273
+ `;
274
+
275
+ let attempt = 0;
276
+ const explicitMaxRetries = 2;
277
+ let chunkKeptFiles = chunk.files; // Default to keeping all if it fails
278
+
279
+ while (attempt <= explicitMaxRetries) {
280
+ try {
281
+ const response = await anthropic.messages.create({
282
+ model: modelToUse,
283
+ max_tokens: 8000,
284
+ temperature: 0.1,
285
+ system:
286
+ "You are a technical analyst. You must return ONLY a raw JSON array of strings (file paths to retain). No markdown formatting.",
287
+ messages: [{ role: "user", content: prompt }],
288
+ });
289
+
290
+ let text =
291
+ response.content.find((c) => c.type === "text")?.text || "[]";
292
+ text = text
293
+ .replace(/^\s*```(json)?/i, "")
294
+ .replace(/```\s*$/i, "")
295
+ .trim();
296
+
297
+ const keptFilesRelative = JSON.parse(text);
298
+ if (!Array.isArray(keptFilesRelative)) {
299
+ throw new Error("AI did not return an array.");
300
+ }
301
+
302
+ const keptFilesAbsolute = keptFilesRelative.map((p) =>
303
+ path.resolve(targetDir, p),
304
+ );
305
+ chunkKeptFiles = chunk.files.filter((f) =>
306
+ keptFilesAbsolute.includes(path.resolve(f)),
307
+ );
308
+ break;
309
+ } catch (err) {
310
+ if (err.status === 429 && attempt < explicitMaxRetries) {
311
+ attempt++;
312
+ const delay = Math.min(
313
+ Math.pow(2, attempt) * 2000 + Math.random() * 1000,
314
+ 30000,
315
+ );
316
+ console.warn(
317
+ `\n⚠️ [AI Pre-filtering] API Rate Limit hit (429). Retrying in ${Math.round(delay / 1000)}s... (Attempt ${attempt}/${explicitMaxRetries})`,
318
+ );
319
+ await new Promise((res) => setTimeout(res, delay));
320
+ } else if (
321
+ attempt < explicitMaxRetries &&
322
+ err.name === "SyntaxError"
323
+ ) {
324
+ attempt++;
325
+ } else {
326
+ console.warn(
327
+ `\n⚠️ AI filtering failed for chunk ${chunk.index + 1}, falling back to all files in chunk.`,
328
+ err.message,
329
+ );
330
+ break;
331
+ }
332
+ }
333
+ }
334
+ return chunkKeptFiles;
335
+ },
336
+ CONCURRENCY_LIMIT,
337
+ );
338
+
339
+ let allKeptFiles = [];
340
+ for (const res of chunkResults) {
341
+ allKeptFiles = allKeptFiles.concat(res);
342
+ }
343
+ return allKeptFiles;
344
+ }
345
+
221
346
  // Stage 1: File-Level (Micro)
222
347
  async function extractFeaturesWithClaude(structuralData, targetDir) {
223
348
  const fullPath = path.join(targetDir, structuralData.path);
@@ -333,6 +458,7 @@ async function extractComponentSummary(dirName, fileSummaries) {
333
458
  1. What is the overarching purpose of this component?
334
459
  2. What are the core macro-features it provides to the broader system?
335
460
  3. Generate a relevant Mermaid.js diagram (e.g., C4 Context, Sequence, or State) showing how the files in this component interact or what flow they represent.
461
+ CRITICAL INSTRUCTION: When creating Mermaid diagrams, you MUST wrap node labels in double quotes if they contain any special characters (like parentheses, brackets, or strange punctuation). For example, use \`NodeID["Text with (parentheses)"]\` instead of \`NodeID[Text with (parentheses)]\`.
336
462
 
337
463
  File Summaries:
338
464
  ${fileSummaries.map((f) => `### File: ${f.path}\n${f.features}`).join("\n\n")}
@@ -383,6 +509,7 @@ async function extractGlobalArchitecture(componentSummaries) {
383
509
  1. Write an Executive Summary of what the entire codebase does.
384
510
  2. Outline the major pillars/domains of the application.
385
511
  3. Generate a high-level Mermaid.js Architecture Diagram showing how the main components interact.
512
+ CRITICAL INSTRUCTION: When creating Mermaid diagrams, you MUST wrap node labels in double quotes if they contain any special characters (like parentheses, brackets, or strange punctuation). For example, use \`NodeID["Text with (parentheses)"]\` instead of \`NodeID[Text with (parentheses)]\`.
386
513
 
387
514
  Component Summaries:
388
515
  ${Object.entries(componentSummaries)
@@ -588,6 +715,21 @@ async function main() {
588
715
  return;
589
716
  }
590
717
 
718
+ // --- CONFIRMATION ENQUIRER ---
719
+ const { Confirm } = require("enquirer");
720
+ const prompt = new Confirm({
721
+ name: "proceed",
722
+ message: `Ready to analyze ${targetFiles.length} files. Should we run AI Pre-filtering (Stage 0) before continuing?`,
723
+ });
724
+
725
+ let useAiFilter = false;
726
+ try {
727
+ useAiFilter = await prompt.run();
728
+ } catch (err) {
729
+ console.log("Aborted.");
730
+ return;
731
+ }
732
+
591
733
  // --- CACHE INITIALIZATION ---
592
734
  const folderName = path.basename(path.resolve(targetDir));
593
735
  const outputPath = path.join(process.cwd(), `${folderName}-features.md`);
@@ -631,14 +773,37 @@ async function main() {
631
773
 
632
774
  const tasks = new Listr(
633
775
  [
776
+ {
777
+ title: "STAGE 0: AI File Pre-filtering",
778
+ skip: () => !useAiFilter,
779
+ task: async (ctx, task) => {
780
+ task.output = `Analyzing ${targetFiles.length} files...`;
781
+
782
+ if (targetFiles.length === 0) {
783
+ ctx.filteredFiles = [];
784
+ task.title = "STAGE 0: AI File Pre-filtering (No files to filter)";
785
+ return;
786
+ }
787
+
788
+ const filtered = await prefilterFilesWithClaude(
789
+ targetFiles,
790
+ targetDir,
791
+ );
792
+ const removedCount = targetFiles.length - filtered.length;
793
+
794
+ ctx.filteredFiles = filtered;
795
+ task.title = `STAGE 0: AI File Pre-filtering (Removed ${removedCount} trivial files)`;
796
+ },
797
+ },
634
798
  {
635
799
  title: "Analyzing File Structure",
636
800
  task: async (ctx, task) => {
637
801
  const structuralData = [];
638
802
  let completed = 0;
639
- const total = targetFiles.length;
803
+ const filesToAnalyze = ctx.filteredFiles || targetFiles;
804
+ const total = filesToAnalyze.length;
640
805
 
641
- for (const file of targetFiles) {
806
+ for (const file of filesToAnalyze) {
642
807
  if (file.endsWith("index.js") && __dirname === targetDir) {
643
808
  completed++;
644
809
  continue;
@@ -823,6 +988,7 @@ module.exports = {
823
988
  getIgnores,
824
989
  initTreeSitter,
825
990
  extractStructure,
991
+ prefilterFilesWithClaude,
826
992
  extractFeaturesWithClaude,
827
993
  extractComponentSummary,
828
994
  extractGlobalArchitecture,
package/index.test.js CHANGED
@@ -69,8 +69,8 @@ def py_function():
69
69
  const ig = await getIgnores(testDir);
70
70
 
71
71
  // Defaults
72
- expect(ig.ignores("node_modules")).toBe(true);
73
- expect(ig.ignores(".git")).toBe(true);
72
+ expect(ig.ignores("node_modules/test.js")).toBe(true);
73
+ expect(ig.ignores(".git/config")).toBe(true);
74
74
 
75
75
  // Custom rules
76
76
  expect(ig.ignores("ignored_folder/file.js")).toBe(true);
@@ -86,8 +86,8 @@ def py_function():
86
86
  await fs.mkdir(emptyDir, { recursive: true });
87
87
 
88
88
  const ig = await getIgnores(emptyDir);
89
- expect(ig.ignores("node_modules")).toBe(true);
90
- expect(ig.ignores(".git")).toBe(true);
89
+ expect(ig.ignores("node_modules/test.js")).toBe(true);
90
+ expect(ig.ignores(".git/config")).toBe(true);
91
91
  expect(ig.ignores("test.log")).toBe(false); // custom rule should be false
92
92
 
93
93
  await fs.rm(emptyDir, { recursive: true, force: true });
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@brunobrise/xfeat",
3
- "version": "1.1.1",
4
- "description": "",
3
+ "version": "1.3.0",
4
+ "description": "Automated AI-driven CLI for codebase analysis and feature extraction.",
5
5
  "publishConfig": {
6
6
  "access": "public"
7
7
  },
@@ -35,6 +35,7 @@
35
35
  "dependencies": {
36
36
  "@anthropic-ai/sdk": "^0.78.0",
37
37
  "dotenv": "^17.3.1",
38
+ "enquirer": "^2.4.1",
38
39
  "fast-glob": "^3.3.3",
39
40
  "ignore": "^7.0.5",
40
41
  "listr2": "^6.6.1",