@merlean/analyzer 1.2.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/analyzer.js +142 -82
  2. package/package.json +1 -1
package/lib/analyzer.js CHANGED
@@ -1,23 +1,32 @@
1
1
  /**
2
- * Codebase Scanner
2
+ * Frontend-focused Codebase Scanner
3
3
  *
4
- * Scans codebase and prepares file contents for backend analysis.
5
- * NO LLM calls here - that happens on the backend.
4
+ * Scans FRONTEND code to learn how it communicates with the backend.
5
+ * Extracts: fetch(), axios, $.ajax, API calls, form submissions
6
+ *
7
+ * This is what the bot needs - it runs in the browser and should know
8
+ * what API calls the frontend already makes.
6
9
  */
7
10
 
8
11
  const fs = require('fs');
9
12
  const path = require('path');
10
13
  const { glob } = require('glob');
11
14
 
12
- // File patterns to scan
13
- const SCAN_PATTERNS = [
15
+ // Frontend file patterns (prioritize frontend code)
16
+ const FRONTEND_PATTERNS = [
14
17
  '**/*.js',
15
- '**/*.ts',
16
18
  '**/*.jsx',
19
+ '**/*.ts',
17
20
  '**/*.tsx',
18
- '**/*.php',
19
- '**/*.py',
20
- '**/*.rb'
21
+ '**/*.vue',
22
+ '**/*.svelte',
23
+ '**/app.js',
24
+ '**/main.js',
25
+ '**/index.js',
26
+ '**/*api*.js',
27
+ '**/*service*.js',
28
+ '**/*fetch*.js',
29
+ '**/*http*.js'
21
30
  ];
22
31
 
23
32
  // Directories to ignore
@@ -30,23 +39,28 @@ const IGNORE_PATTERNS = [
30
39
  '**/__pycache__/**',
31
40
  '**/venv/**',
32
41
  '**/*.min.js',
33
- '**/*.map'
42
+ '**/*.map',
43
+ '**/server.js', // Skip backend files
44
+ '**/server/**',
45
+ '**/backend/**',
46
+ '**/api/**', // Skip backend API folders
47
+ '**/controllers/**'
34
48
  ];
35
49
 
36
50
  // Keywords to prioritize files
37
51
  const PRIORITY_KEYWORDS = [
38
- 'route', 'router', 'controller', 'api', 'endpoint',
39
- 'form', 'submit', 'action', 'handler', 'service'
52
+ 'fetch', 'axios', 'api', 'service', 'http', 'request',
53
+ 'ajax', 'client', 'frontend', 'app', 'main', 'store'
40
54
  ];
41
55
 
42
56
  /**
43
- * Scan codebase and collect file contents
57
+ * Scan codebase and collect frontend API patterns
44
58
  */
45
59
  async function scanCodebase(codebasePath) {
46
- console.log(' Scanning files...');
60
+ console.log(' Scanning frontend files...');
47
61
 
48
62
  // Get files to scan
49
- const files = await glob(SCAN_PATTERNS, {
63
+ const files = await glob(FRONTEND_PATTERNS, {
50
64
  cwd: codebasePath,
51
65
  ignore: IGNORE_PATTERNS,
52
66
  absolute: true
@@ -54,116 +68,162 @@ async function scanCodebase(codebasePath) {
54
68
 
55
69
  console.log(` Found ${files.length} files`);
56
70
 
57
- // Prioritize and limit files
71
+ // Prioritize frontend-focused files
58
72
  const prioritizedFiles = prioritizeFiles(files, codebasePath);
59
- const filesToAnalyze = prioritizedFiles.slice(0, 50); // Limit for performance
73
+ const filesToAnalyze = prioritizedFiles.slice(0, 30); // Fewer files, but more content
60
74
 
61
- console.log(` Preparing ${filesToAnalyze.length} priority files...`);
75
+ console.log(` Analyzing ${filesToAnalyze.length} frontend files...`);
62
76
 
63
- // Read and prepare file contents
77
+ // Read and extract API patterns from files
64
78
  const fileContents = [];
65
79
  for (const file of filesToAnalyze) {
66
80
  try {
67
81
  const content = fs.readFileSync(file, 'utf-8');
68
82
  const relativePath = path.relative(codebasePath, file);
69
83
 
70
- // Smart extraction: if file is large, extract route-like patterns
71
- let extractedContent;
72
- if (content.length > 8000) {
73
- // For large files, extract route definitions and API patterns
74
- extractedContent = extractRoutePatterns(content, relativePath);
75
- } else {
76
- // For smaller files, include more content
77
- extractedContent = content.slice(0, 8000);
78
- }
84
+ // Extract API calls from the file
85
+ const extracted = extractApiPatterns(content, relativePath);
79
86
 
80
- fileContents.push({
81
- path: relativePath,
82
- content: extractedContent
83
- });
87
+ if (extracted.hasApiCalls) {
88
+ fileContents.push({
89
+ path: relativePath,
90
+ content: extracted.content
91
+ });
92
+ }
84
93
  } catch (error) {
85
94
  // Skip files that can't be read
86
95
  }
87
96
  }
88
97
 
98
+ console.log(` Found API patterns in ${fileContents.length} files`);
99
+
89
100
  return fileContents;
90
101
  }
91
102
 
92
103
  /**
93
- * Extract route patterns from large files
104
+ * Extract API call patterns from file content
94
105
  */
95
- function extractRoutePatterns(content, filePath) {
106
+ function extractApiPatterns(content, filePath) {
107
+ const apiPatterns = [];
96
108
  const lines = content.split('\n');
97
- const relevantLines = [];
98
109
 
99
- // Patterns that indicate API routes/endpoints
100
- const routePatterns = [
101
- /app\.(get|post|put|patch|delete|use)\s*\(/i,
102
- /router\.(get|post|put|patch|delete|use)\s*\(/i,
103
- /Route::(get|post|put|patch|delete)\s*\(/i,
104
- /@(Get|Post|Put|Patch|Delete|RequestMapping)/i,
105
- /def\s+(get|post|put|patch|delete|index|create|update|destroy)/i,
106
- /function\s+\w+\s*\(\s*(req|request)/i,
107
- /fetch\s*\(/i,
108
- /axios\./i,
109
- /api['"]\s*:/i,
110
- /endpoint/i,
111
- /\/api\//i
110
+ // Patterns that indicate API calls
111
+ const patterns = [
112
+ // fetch() calls
113
+ { regex: /fetch\s*\(\s*[`'"](.*?)[`'"]/g, type: 'fetch' },
114
+ { regex: /fetch\s*\(\s*`([^`]*)`/g, type: 'fetch-template' },
115
+ { regex: /fetch\s*\(\s*(['"])?\/api\//g, type: 'fetch-api' },
116
+
117
+ // axios calls
118
+ { regex: /axios\.(get|post|put|patch|delete)\s*\(\s*[`'"](.*?)[`'"]/g, type: 'axios' },
119
+ { regex: /axios\s*\(\s*\{[^}]*url\s*:\s*[`'"](.*?)[`'"]/g, type: 'axios-config' },
120
+
121
+ // jQuery ajax
122
+ { regex: /\$\.(ajax|get|post)\s*\(\s*[`'"](.*?)[`'"]/g, type: 'jquery' },
123
+
124
+ // Generic API URLs
125
+ { regex: /['"`](\/api\/[^'"`\s]+)['"`]/g, type: 'api-url' },
126
+ { regex: /['"`](https?:\/\/[^'"`\s]*\/api[^'"`\s]*)['"`]/g, type: 'full-url' },
127
+
128
+ // Method + URL patterns
129
+ { regex: /(GET|POST|PUT|PATCH|DELETE)\s*[,:]?\s*['"`](\/[^'"`]+)['"`]/gi, type: 'method-url' },
112
130
  ];
131
+
132
+ let hasApiCalls = false;
133
+ const extractedBlocks = [];
113
134
 
114
- let inRouteBlock = false;
115
- let braceCount = 0;
135
+ // FIRST: Extract API base URL definitions (critical for resolving paths)
136
+ const baseUrlDefinitions = [];
137
+ for (let i = 0; i < lines.length; i++) {
138
+ const line = lines[i];
139
+ // Match: const API_BASE = ..., const baseURL = ..., const apiUrl = ..., etc.
140
+ if (/^\s*(const|let|var)\s+(API_BASE|API_URL|BASE_URL|baseURL|apiUrl|apiBase|API_ENDPOINT)/i.test(line)) {
141
+ baseUrlDefinitions.push(`${i + 1}: ${line}`);
142
+ }
143
+ // Also match: axios.defaults.baseURL = ...
144
+ if (/baseURL\s*[:=]/i.test(line)) {
145
+ baseUrlDefinitions.push(`${i + 1}: ${line}`);
146
+ }
147
+ }
116
148
 
149
+ if (baseUrlDefinitions.length > 0) {
150
+ extractedBlocks.push('// API BASE URL DEFINITIONS (use these to resolve full paths):\n' + baseUrlDefinitions.join('\n'));
151
+ hasApiCalls = true;
152
+ }
153
+
154
+ // Line-by-line extraction with context
117
155
  for (let i = 0; i < lines.length; i++) {
118
156
  const line = lines[i];
119
157
 
120
- // Check if line matches any route pattern
121
- const isRouteLine = routePatterns.some(pattern => pattern.test(line));
158
+ // Check for API patterns
159
+ const hasPattern = patterns.some(p => p.regex.test(line));
160
+ // Reset regex lastIndex
161
+ patterns.forEach(p => p.regex.lastIndex = 0);
122
162
 
123
- if (isRouteLine) {
124
- // Include context: 2 lines before
125
- for (let j = Math.max(0, i - 2); j < i; j++) {
126
- if (!relevantLines.includes(lines[j])) {
127
- relevantLines.push(`// Line ${j + 1}: ${lines[j]}`);
128
- }
129
- }
130
- relevantLines.push(`// Line ${i + 1}: ${line}`);
131
- inRouteBlock = true;
132
- braceCount = (line.match(/{/g) || []).length - (line.match(/}/g) || []).length;
133
- } else if (inRouteBlock) {
134
- // Continue capturing the route handler
135
- relevantLines.push(line);
136
- braceCount += (line.match(/{/g) || []).length - (line.match(/}/g) || []).length;
163
+ // Also check for common API keywords
164
+ const hasKeyword = /fetch|axios|\.ajax|\.get\(|\.post\(|\.put\(|\.delete\(|\/api\/|endpoint/i.test(line);
165
+
166
+ // Check if this is a POST/PUT request - need more context for body structure
167
+ const isPostRequest = /method:\s*['"]POST|\.post\(|method:\s*['"]PUT|\.put\(/i.test(line);
168
+
169
+ if (hasPattern || hasKeyword) {
170
+ hasApiCalls = true;
137
171
 
138
- // End of route block
139
- if (braceCount <= 0) {
140
- inRouteBlock = false;
141
- relevantLines.push('// ---');
142
- }
172
+ // Get MORE context for POST/PUT requests to capture body structure definitions
173
+ // Body objects are often defined 15-25 lines before the fetch call
174
+ const contextBefore = isPostRequest ? 25 : 3; // More context for POST
175
+ const contextAfter = isPostRequest ? 10 : 5;
143
176
 
144
- // Safety limit per block
145
- if (relevantLines.length > 200) {
146
- inRouteBlock = false;
147
- }
177
+ const startLine = Math.max(0, i - contextBefore);
178
+ const endLine = Math.min(lines.length - 1, i + contextAfter);
179
+
180
+ const block = lines.slice(startLine, endLine + 1)
181
+ .map((l, idx) => `${startLine + idx + 1}: ${l}`)
182
+ .join('\n');
183
+
184
+ extractedBlocks.push(block);
185
+
186
+ // Skip ahead to avoid duplicates
187
+ i = endLine;
148
188
  }
149
189
  }
150
-
151
- // If we found relevant lines, return them; otherwise return truncated content
152
- if (relevantLines.length > 10) {
153
- return `// Extracted route patterns from ${filePath} (${lines.length} lines total)\n\n${relevantLines.join('\n')}`;
190
+
191
+ // If we found API patterns, return extracted content
192
+ if (hasApiCalls && extractedBlocks.length > 0) {
193
+ return {
194
+ hasApiCalls: true,
195
+ content: `// File: ${filePath}\n// API patterns found:\n\n${extractedBlocks.join('\n\n// ---\n\n')}`
196
+ };
154
197
  }
155
-
156
- return content.slice(0, 8000);
198
+
199
+ // Fallback: include first 5000 chars if file looks relevant
200
+ if (/api|fetch|axios|service|http/i.test(filePath)) {
201
+ return {
202
+ hasApiCalls: true,
203
+ content: `// File: ${filePath}\n${content.slice(0, 5000)}`
204
+ };
205
+ }
206
+
207
+ return { hasApiCalls: false, content: '' };
157
208
  }
158
209
 
159
210
  /**
160
- * Prioritize files based on keywords in path/name
211
+ * Prioritize frontend files based on keywords
161
212
  */
162
213
  function prioritizeFiles(files, basePath) {
163
214
  return files.sort((a, b) => {
164
215
  const aPath = path.relative(basePath, a).toLowerCase();
165
216
  const bPath = path.relative(basePath, b).toLowerCase();
166
217
 
218
+ // Deprioritize test files
219
+ if (aPath.includes('test') || aPath.includes('spec')) return 1;
220
+ if (bPath.includes('test') || bPath.includes('spec')) return -1;
221
+
222
+ // Prioritize src/frontend folders
223
+ if (aPath.includes('src/') || aPath.includes('frontend/')) {
224
+ if (!bPath.includes('src/') && !bPath.includes('frontend/')) return -1;
225
+ }
226
+
167
227
  const aScore = PRIORITY_KEYWORDS.reduce((score, kw) =>
168
228
  aPath.includes(kw) ? score + 1 : score, 0);
169
229
  const bScore = PRIORITY_KEYWORDS.reduce((score, kw) =>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@merlean/analyzer",
3
- "version": "1.2.0",
3
+ "version": "2.1.0",
4
4
  "description": "AI Bot codebase analyzer - generates site maps for AI assistant integration",
5
5
  "keywords": ["ai", "bot", "analyzer", "claude", "anthropic", "widget"],
6
6
  "author": "zmaren",