npm - @brunobrise/xfeat - Versions diffs - 1.1.1 → 1.3.0 - Mend

@brunobrise/xfeat 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/examples/nanobot-features.md +1 -1
package/index.js +182 -16
package/index.test.js +4 -4
package/package.json +3 -2

package/examples/nanobot-features.md CHANGED Viewed

@@ -755,7 +755,7 @@ flowchart TB
         end
         subgraph ProviderLayer["LLM Provider Registry"]
-            PR[Provider Router<br/>(LiteLLM)]
+            PR["Provider Router<br/>(LiteLLM)"]
         end
         subgraph Security["Security Layer"]

package/index.js CHANGED Viewed

@@ -17,21 +17,30 @@ const anthropic = new Anthropic({
 // Helper: Read and parse .gitignore
 async function getIgnores(targetDir) {
   const ig = ignore().add([
-    "node_modules",
-    "bower_components",
-    "vendor",
-    "venv",
-    ".venv",
-    "env",
-    "__pycache__",
-    ".tox",
-    "target",
-    "packages",
-    ".gradle",
-    ".git",
-    "dist",
-    "build",
+    "node_modules/",
+    "bower_components/",
+    "vendor/",
+    "venv/",
+    ".venv/",
+    "env/",
+    "__pycache__/",
+    ".tox/",
+    "target/",
+    "packages/",
+    ".gradle/",
+    ".git/",
+    "dist/",
+    "build/",
+    "out/",
     "*.min.js",
+    "android/",
+    "ios/",
+    ".next/",
+    "nextjs/",
+    "coverage/",
+    "tmp/",
+    "temp/",
+    ".expo/",
   ]);
   try {
     const gitignoreContent = await fs.readFile(
@@ -42,6 +51,17 @@ async function getIgnores(targetDir) {
   } catch (err) {
     // No .gitignore found, proceed with defaults
   }
+  try {
+    const xfeatignoreContent = await fs.readFile(
+      path.join(targetDir, ".xfeatignore"),
+      "utf8",
+    );
+    ig.add(xfeatignoreContent);
+  } catch (err) {
+    // No .xfeatignore found, silently continue
+  }
   return ig;
 }
@@ -218,6 +238,111 @@ async function extractStructure(filePath) {
 // --- LLM Stages ---
+// Stage 0: AI File Pre-filtering
+async function prefilterFilesWithClaude(filePaths, targetDir) {
+  const CHUNK_SIZE = 1000;
+  const chunks = [];
+  for (let i = 0; i < filePaths.length; i += CHUNK_SIZE) {
+    chunks.push({
+      index: Math.floor(i / CHUNK_SIZE),
+      files: filePaths.slice(i, i + CHUNK_SIZE),
+    });
+  }
+  const modelToUse =
+    process.env.CLAUDE_CODE_SUBAGENT_MODEL ||
+    process.env.ANTHROPIC_MODEL ||
+    "claude-3-7-sonnet-20250219";
+  const CONCURRENCY_LIMIT = parseInt(process.env.CONCURRENCY_LIMIT || "5", 10);
+  const chunkResults = await pMap(
+    chunks,
+    async (chunk) => {
+      const relativePaths = chunk.files.map((f) => path.relative(targetDir, f));
+      const prompt = `
+      You are an expert software architect. You are given a list of file paths from a codebase.
+      Your task is to filter this list by identifying and EXCLUDING any boilerplate, trivial configuration, auto-generated files, lock files, empty files, non-functional UI assets, or generic/non-core test files that would not contribute meaningfully to a high-level architectural summary of the core system.
+      Return ONLY a raw JSON array of strings containing the file paths that should be KEPT.
+      Do not include any explanations, markdown formatting, or backticks. Just the raw JSON array.
+      File paths (Chunk ${chunk.index + 1} of ${chunks.length}):
+      ${JSON.stringify(relativePaths, null, 2)}
+      `;
+      let attempt = 0;
+      const explicitMaxRetries = 2;
+      let chunkKeptFiles = chunk.files; // Default to keeping all if it fails
+      while (attempt <= explicitMaxRetries) {
+        try {
+          const response = await anthropic.messages.create({
+            model: modelToUse,
+            max_tokens: 8000,
+            temperature: 0.1,
+            system:
+              "You are a technical analyst. You must return ONLY a raw JSON array of strings (file paths to retain). No markdown formatting.",
+            messages: [{ role: "user", content: prompt }],
+          });
+          let text =
+            response.content.find((c) => c.type === "text")?.text || "[]";
+          text = text
+            .replace(/^\s*```(json)?/i, "")
+            .replace(/```\s*$/i, "")
+            .trim();
+          const keptFilesRelative = JSON.parse(text);
+          if (!Array.isArray(keptFilesRelative)) {
+            throw new Error("AI did not return an array.");
+          }
+          const keptFilesAbsolute = keptFilesRelative.map((p) =>
+            path.resolve(targetDir, p),
+          );
+          chunkKeptFiles = chunk.files.filter((f) =>
+            keptFilesAbsolute.includes(path.resolve(f)),
+          );
+          break;
+        } catch (err) {
+          if (err.status === 429 && attempt < explicitMaxRetries) {
+            attempt++;
+            const delay = Math.min(
+              Math.pow(2, attempt) * 2000 + Math.random() * 1000,
+              30000,
+            );
+            console.warn(
+              `\n⚠️  [AI Pre-filtering] API Rate Limit hit (429). Retrying in ${Math.round(delay / 1000)}s... (Attempt ${attempt}/${explicitMaxRetries})`,
+            );
+            await new Promise((res) => setTimeout(res, delay));
+          } else if (
+            attempt < explicitMaxRetries &&
+            err.name === "SyntaxError"
+          ) {
+            attempt++;
+          } else {
+            console.warn(
+              `\n⚠️  AI filtering failed for chunk ${chunk.index + 1}, falling back to all files in chunk.`,
+              err.message,
+            );
+            break;
+          }
+        }
+      }
+      return chunkKeptFiles;
+    },
+    CONCURRENCY_LIMIT,
+  );
+  let allKeptFiles = [];
+  for (const res of chunkResults) {
+    allKeptFiles = allKeptFiles.concat(res);
+  }
+  return allKeptFiles;
+}
 // Stage 1: File-Level (Micro)
 async function extractFeaturesWithClaude(structuralData, targetDir) {
   const fullPath = path.join(targetDir, structuralData.path);
@@ -333,6 +458,7 @@ async function extractComponentSummary(dirName, fileSummaries) {
   1. What is the overarching purpose of this component?
   2. What are the core macro-features it provides to the broader system?
   3. Generate a relevant Mermaid.js diagram (e.g., C4 Context, Sequence, or State) showing how the files in this component interact or what flow they represent.
+  CRITICAL INSTRUCTION: When creating Mermaid diagrams, you MUST wrap node labels in double quotes if they contain any special characters (like parentheses, brackets, or strange punctuation). For example, use \`NodeID["Text with (parentheses)"]\` instead of \`NodeID[Text with (parentheses)]\`.
   File Summaries:
   ${fileSummaries.map((f) => `### File: ${f.path}\n${f.features}`).join("\n\n")}
@@ -383,6 +509,7 @@ async function extractGlobalArchitecture(componentSummaries) {
     1. Write an Executive Summary of what the entire codebase does.
     2. Outline the major pillars/domains of the application.
     3. Generate a high-level Mermaid.js Architecture Diagram showing how the main components interact.
+    CRITICAL INSTRUCTION: When creating Mermaid diagrams, you MUST wrap node labels in double quotes if they contain any special characters (like parentheses, brackets, or strange punctuation). For example, use \`NodeID["Text with (parentheses)"]\` instead of \`NodeID[Text with (parentheses)]\`.
     Component Summaries:
     ${Object.entries(componentSummaries)
@@ -588,6 +715,21 @@ async function main() {
     return;
   }
+  // --- CONFIRMATION ENQUIRER ---
+  const { Confirm } = require("enquirer");
+  const prompt = new Confirm({
+    name: "proceed",
+    message: `Ready to analyze ${targetFiles.length} files. Should we run AI Pre-filtering (Stage 0) before continuing?`,
+  });
+  let useAiFilter = false;
+  try {
+    useAiFilter = await prompt.run();
+  } catch (err) {
+    console.log("Aborted.");
+    return;
+  }
   // --- CACHE INITIALIZATION ---
   const folderName = path.basename(path.resolve(targetDir));
   const outputPath = path.join(process.cwd(), `${folderName}-features.md`);
@@ -631,14 +773,37 @@ async function main() {
   const tasks = new Listr(
     [
+      {
+        title: "STAGE 0: AI File Pre-filtering",
+        skip: () => !useAiFilter,
+        task: async (ctx, task) => {
+          task.output = `Analyzing ${targetFiles.length} files...`;
+          if (targetFiles.length === 0) {
+            ctx.filteredFiles = [];
+            task.title = "STAGE 0: AI File Pre-filtering (No files to filter)";
+            return;
+          }
+          const filtered = await prefilterFilesWithClaude(
+            targetFiles,
+            targetDir,
+          );
+          const removedCount = targetFiles.length - filtered.length;
+          ctx.filteredFiles = filtered;
+          task.title = `STAGE 0: AI File Pre-filtering (Removed ${removedCount} trivial files)`;
+        },
+      },
       {
         title: "Analyzing File Structure",
         task: async (ctx, task) => {
           const structuralData = [];
           let completed = 0;
-          const total = targetFiles.length;
+          const filesToAnalyze = ctx.filteredFiles || targetFiles;
+          const total = filesToAnalyze.length;
-          for (const file of targetFiles) {
+          for (const file of filesToAnalyze) {
             if (file.endsWith("index.js") && __dirname === targetDir) {
               completed++;
               continue;
@@ -823,6 +988,7 @@ module.exports = {
   getIgnores,
   initTreeSitter,
   extractStructure,
+  prefilterFilesWithClaude,
   extractFeaturesWithClaude,
   extractComponentSummary,
   extractGlobalArchitecture,

package/index.test.js CHANGED Viewed

@@ -69,8 +69,8 @@ def py_function():
       const ig = await getIgnores(testDir);
       // Defaults
-      expect(ig.ignores("node_modules")).toBe(true);
-      expect(ig.ignores(".git")).toBe(true);
+      expect(ig.ignores("node_modules/test.js")).toBe(true);
+      expect(ig.ignores(".git/config")).toBe(true);
       // Custom rules
       expect(ig.ignores("ignored_folder/file.js")).toBe(true);
@@ -86,8 +86,8 @@ def py_function():
       await fs.mkdir(emptyDir, { recursive: true });
       const ig = await getIgnores(emptyDir);
-      expect(ig.ignores("node_modules")).toBe(true);
-      expect(ig.ignores(".git")).toBe(true);
+      expect(ig.ignores("node_modules/test.js")).toBe(true);
+      expect(ig.ignores(".git/config")).toBe(true);
       expect(ig.ignores("test.log")).toBe(false); // custom rule should be false
       await fs.rm(emptyDir, { recursive: true, force: true });

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@brunobrise/xfeat",
-  "version": "1.1.1",
-  "description": "",
+  "version": "1.3.0",
+  "description": "Automated AI-driven CLI for codebase analysis and feature extraction.",
   "publishConfig": {
     "access": "public"
   },
@@ -35,6 +35,7 @@
   "dependencies": {
     "@anthropic-ai/sdk": "^0.78.0",
     "dotenv": "^17.3.1",
+    "enquirer": "^2.4.1",
     "fast-glob": "^3.3.3",
     "ignore": "^7.0.5",
     "listr2": "^6.6.1",