@merlean/analyzer 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/bin/cli.js +47 -12
  2. package/lib/analyzer.js +1231 -123
  3. package/package.json +1 -1
package/lib/analyzer.js CHANGED
@@ -1,66 +1,172 @@
1
1
  /**
2
- * Frontend-focused Codebase Scanner
2
+ * Language & Framework Agnostic Codebase Scanner
3
3
  *
4
- * Scans FRONTEND code to learn how it communicates with the backend.
5
- * Extracts: fetch(), axios, $.ajax, API calls, form submissions
4
+ * Scans ANY codebase (frontend or backend, any language) to extract API patterns:
6
5
  *
7
- * This is what the bot needs - it runs in the browser and should know
8
- * what API calls the frontend already makes.
6
+ * JavaScript/TypeScript:
7
+ * - Express/Fastify/Koa/Hapi route definitions
8
+ * - NestJS decorators (@Get, @Post, @Body, @Query, etc.)
9
+ * - Frontend HTTP calls (fetch, axios, HttpClient, etc.)
10
+ *
11
+ * PHP:
12
+ * - CodeIgniter routes ($routes->get(), $routes->post())
13
+ * - Laravel routes (Route::get(), Route::post())
14
+ * - Symfony annotations (@Route)
15
+ *
16
+ * Python:
17
+ * - Flask routes (@app.route)
18
+ * - FastAPI routes (@app.get, @router.post)
19
+ * - Django URLs (path(), url())
20
+ *
21
+ * Ruby:
22
+ * - Rails routes (get, post, resources)
23
+ * - Sinatra routes (get '/path')
24
+ *
25
+ * Go:
26
+ * - Gin routes (r.GET, r.POST)
27
+ * - Echo routes (e.GET, e.POST)
28
+ * - Chi routes (r.Get, r.Post)
29
+ *
30
+ * Java/Kotlin:
31
+ * - Spring Boot (@GetMapping, @PostMapping, @RequestMapping)
32
+ *
33
+ * Plus: Swagger/OpenAPI annotations, request body schemas, DTOs
34
+ *
35
+ * The goal is to understand what APIs exist, their parameters,
36
+ * and body structures - regardless of framework, language, or code style.
9
37
  */
10
38
 
11
39
  const fs = require('fs');
12
40
  const path = require('path');
13
41
  const { glob } = require('glob');
14
42
 
15
- // Frontend file patterns (prioritize frontend code)
16
- const FRONTEND_PATTERNS = [
43
+ // File patterns to scan - include ALL common backend/frontend languages
44
+ const FILE_PATTERNS = [
45
+ // JavaScript/TypeScript
17
46
  '**/*.js',
18
47
  '**/*.jsx',
19
48
  '**/*.ts',
20
49
  '**/*.tsx',
21
50
  '**/*.vue',
22
51
  '**/*.svelte',
23
- '**/app.js',
24
- '**/main.js',
25
- '**/index.js',
26
- '**/*api*.js',
27
- '**/*service*.js',
28
- '**/*fetch*.js',
29
- '**/*http*.js'
52
+ '**/*.mjs',
53
+ '**/*.cjs',
54
+ // PHP
55
+ '**/*.php',
56
+ // Python
57
+ '**/*.py',
58
+ // Ruby
59
+ '**/*.rb',
60
+ // Go
61
+ '**/*.go',
62
+ // Java/Kotlin
63
+ '**/*.java',
64
+ '**/*.kt',
65
+ // C#
66
+ '**/*.cs',
67
+ // Rust
68
+ '**/*.rs'
30
69
  ];
31
70
 
32
- // Directories to ignore
71
+ // Only ignore truly irrelevant directories
33
72
  const IGNORE_PATTERNS = [
34
73
  '**/node_modules/**',
35
74
  '**/vendor/**',
36
75
  '**/.git/**',
37
76
  '**/dist/**',
38
77
  '**/build/**',
78
+ '**/coverage/**',
39
79
  '**/__pycache__/**',
40
80
  '**/venv/**',
41
81
  '**/*.min.js',
42
82
  '**/*.map',
43
- '**/server.js', // Skip backend files
44
- '**/server/**',
45
- '**/backend/**',
46
- '**/api/**', // Skip backend API folders
47
- '**/controllers/**'
83
+ '**/*.d.ts', // TypeScript declaration files
84
+ '**/*.spec.ts', // Test files
85
+ '**/*.spec.js',
86
+ '**/*.test.ts',
87
+ '**/*.test.js',
88
+ '**/__tests__/**',
89
+ '**/__mocks__/**'
48
90
  ];
49
91
 
50
- // Keywords to prioritize files
51
- const PRIORITY_KEYWORDS = [
52
- 'fetch', 'axios', 'api', 'service', 'http', 'request',
53
- 'ajax', 'client', 'frontend', 'app', 'main', 'store'
92
+ // Files that are highly likely to contain API definitions (any language)
93
+ const HIGH_PRIORITY_PATTERNS = [
94
+ // Generic route/controller patterns (any extension)
95
+ /routes?\.(ts|js|php|py|rb|go|java|kt|cs)$/i,
96
+ /router\.(ts|js|php|py|rb|go|java|kt|cs)$/i,
97
+ /controller\.(ts|js|php|py|rb|go|java|kt|cs)$/i,
98
+ /\.controller\.(ts|js|php|py|rb|go|java|kt|cs)$/i,
99
+ /Controller\.(php|java|kt|cs)$/, // PascalCase controllers (PHP, Java, C#)
100
+ /service\.(ts|js|php|py|rb|go|java|kt|cs)$/i,
101
+ /\.service\.(ts|js|php|py|rb|go|java|kt|cs)$/i,
102
+ /api\.(ts|js|php|py|rb|go|java|kt|cs)$/i,
103
+ /endpoints?\.(ts|js|php|py|rb|go|java|kt|cs)$/i,
104
+ /http\.(ts|js|php|py|rb|go)$/i,
105
+ /client\.(ts|js|php|py|rb|go)$/i,
106
+
107
+ // TypeScript/JavaScript specific
108
+ /dto\.ts$/i,
109
+ /\.dto\.ts$/i,
110
+ /interfaces?\.ts$/i,
111
+ /types?\.ts$/i,
112
+ /schema\.ts$/i,
113
+ /schemas?\.ts$/i,
114
+ /validation\.ts$/i,
115
+
116
+ // PHP specific (CodeIgniter, Laravel, Symfony)
117
+ /Routes\.php$/i,
118
+ /web\.php$/i, // Laravel web routes
119
+ /api\.php$/i, // Laravel API routes
120
+ /Config\/Routes\.php$/i, // CodeIgniter routes
121
+ // CodeIgniter convention: application/controllers/**/*.php are ALL route controllers
122
+ /application\/controllers\/.*\.php$/i,
123
+ /app\/Controllers\/.*\.php$/i, // CodeIgniter 4
124
+ /app\/Http\/Controllers\/.*\.php$/i, // Laravel controllers
125
+ // Libraries and models that define data structures
126
+ /application\/libraries\/.*\.php$/i,
127
+ /application\/models\/.*\.php$/i,
128
+ /app\/Models\/.*\.php$/i,
129
+ /app\/Services\/.*\.php$/i,
130
+
131
+ // Python specific (Flask, FastAPI, Django)
132
+ /views?\.py$/i,
133
+ /urls\.py$/i, // Django URLs
134
+ /routers?\.py$/i, // FastAPI routers
135
+
136
+ // Ruby specific (Rails)
137
+ /routes\.rb$/i,
138
+ /_controller\.rb$/i,
139
+
140
+ // Go specific
141
+ /handlers?\.go$/i,
142
+ /routes?\.go$/i,
143
+
144
+ // Java/Kotlin specific (Spring Boot)
145
+ /Controller\.java$/,
146
+ /Controller\.kt$/,
147
+ /RestController\.java$/,
148
+ /RestController\.kt$/
149
+ ];
150
+
151
+ // Medium priority - might contain API patterns
152
+ const MEDIUM_PRIORITY_PATTERNS = [
153
+ /index\.(ts|js|php|py|rb|go)$/i,
154
+ /app\.(ts|js|php|py|rb|go)$/i,
155
+ /main\.(ts|js|php|py|rb|go|java|kt)$/i,
156
+ /server\.(ts|js|php|py|rb|go)$/i,
157
+ /Application\.(java|kt)$/, // Spring Boot main class
158
+ /bootstrap\.php$/i, // PHP bootstrap files
159
+ /kernel\.php$/i, // Laravel/Symfony kernel
54
160
  ];
55
161
 
56
162
  /**
57
- * Scan codebase and collect frontend API patterns
163
+ * Scan codebase and collect API patterns from any framework
58
164
  */
59
165
  async function scanCodebase(codebasePath) {
60
- console.log(' Scanning frontend files...');
166
+ console.log(' Scanning files...');
61
167
 
62
- // Get files to scan
63
- const files = await glob(FRONTEND_PATTERNS, {
168
+ // Get all matching files
169
+ const files = await glob(FILE_PATTERNS, {
64
170
  cwd: codebasePath,
65
171
  ignore: IGNORE_PATTERNS,
66
172
  absolute: true
@@ -68,23 +174,57 @@ async function scanCodebase(codebasePath) {
68
174
 
69
175
  console.log(` Found ${files.length} files`);
70
176
 
71
- // Prioritize frontend-focused files
72
- const prioritizedFiles = prioritizeFiles(files, codebasePath);
73
- const filesToAnalyze = prioritizedFiles.slice(0, 30); // Fewer files, but more content
177
+ // Categorize and prioritize files
178
+ const { highPriority, mediumPriority, other } = categorizeFiles(files, codebasePath);
179
+
180
+ console.log(` High priority: ${highPriority.length}, Medium: ${mediumPriority.length}, Other: ${other.length}`);
181
+
182
+ // Analyze high priority files first (routes, controllers, services, DTOs)
183
+ // Then medium priority, then scan others for API patterns
184
+ // Increased limits to ensure all controllers are scanned
185
+ const filesToAnalyze = [
186
+ ...highPriority, // ALL high priority files (controllers, routes, etc.)
187
+ ...mediumPriority.slice(0, 30),
188
+ ...other.slice(0, 75)
189
+ ];
74
190
 
75
- console.log(` Analyzing ${filesToAnalyze.length} frontend files...`);
191
+ console.log(` Analyzing ${filesToAnalyze.length} files for API patterns...`);
76
192
 
77
- // Read and extract API patterns from files
193
+ // PHASE 1: Pre-scan all files to discover JSON structures
194
+ // This ensures structures found in one file can be used for others
195
+ console.log(` Phase 1: Discovering JSON structures...`);
196
+ for (const file of filesToAnalyze) {
197
+ try {
198
+ const content = fs.readFileSync(file, 'utf-8');
199
+ // Look for structure-defining patterns (json_decode + array access)
200
+ preDiscoverStructures(content);
201
+ } catch (error) {
202
+ // Skip files that can't be read
203
+ }
204
+ }
205
+ console.log(` Discovered ${discoveredStructures.size} reusable structures`);
206
+
207
+ // PHASE 2: Extract routes and apply discovered structures
78
208
  const fileContents = [];
209
+ const preExtractedRoutes = []; // Routes extracted directly from conventions
210
+ let filesWithPatterns = 0;
211
+
79
212
  for (const file of filesToAnalyze) {
80
213
  try {
81
214
  const content = fs.readFileSync(file, 'utf-8');
82
215
  const relativePath = path.relative(codebasePath, file);
83
216
 
84
- // Extract API calls from the file
85
- const extracted = extractApiPatterns(content, relativePath);
217
+ // Pre-extract routes from convention-based frameworks (CodeIgniter, etc.)
218
+ const conventionRoutes = extractConventionRoutes(content, relativePath);
219
+ if (conventionRoutes.length > 0) {
220
+ preExtractedRoutes.push(...conventionRoutes);
221
+ }
222
+
223
+ // Extract API patterns from the file
224
+ const extracted = extractApiPatterns(content, relativePath, file);
86
225
 
87
- if (extracted.hasApiCalls) {
226
+ if (extracted.hasApiPatterns) {
227
+ filesWithPatterns++;
88
228
  fileContents.push({
89
229
  path: relativePath,
90
230
  content: extracted.content
@@ -95,142 +235,1110 @@ async function scanCodebase(codebasePath) {
95
235
  }
96
236
  }
97
237
 
98
- console.log(` Found API patterns in ${fileContents.length} files`);
238
+ console.log(` Found API patterns in ${filesWithPatterns} files`);
239
+ if (preExtractedRoutes.length > 0) {
240
+ console.log(` Pre-extracted ${preExtractedRoutes.length} convention-based routes`);
241
+ }
99
242
 
100
- return fileContents;
243
+ return { files: fileContents, preExtractedRoutes };
101
244
  }
102
245
 
103
246
  /**
104
- * Extract API call patterns from file content
247
+ * Extract routes from convention-based frameworks
248
+ * Returns array of {method, path, description, bodySchema?} objects
105
249
  */
106
- function extractApiPatterns(content, filePath) {
107
- const apiPatterns = [];
250
+ function extractConventionRoutes(content, filePath) {
251
+ const routes = [];
252
+
253
+ // CodeIgniter: application/controllers/api/v2/Settings.php -> /api/v2/settings
254
+ const basePath = extractRouteFromControllerPath(filePath);
255
+ if (!basePath) return routes;
256
+
257
+ // Split content into lines for method body extraction
108
258
  const lines = content.split('\n');
109
259
 
110
- // Patterns that indicate API calls
111
- const patterns = [
112
- // fetch() calls
113
- { regex: /fetch\s*\(\s*[`'"](.*?)[`'"]/g, type: 'fetch' },
114
- { regex: /fetch\s*\(\s*`([^`]*)`/g, type: 'fetch-template' },
115
- { regex: /fetch\s*\(\s*(['"])?\/api\//g, type: 'fetch-api' },
260
+ // Extract CodeIgniter REST controller methods: index_get, index_post, items_get, etc.
261
+ const methodRegex = /public\s+function\s+(\w+)_(get|post|put|patch|delete)\s*\(([^)]*)\)/gi;
262
+ let match;
263
+
264
+ while ((match = methodRegex.exec(content)) !== null) {
265
+ const action = match[1]; // e.g., 'index', 'items', 'user'
266
+ const httpMethod = match[2].toUpperCase(); // e.g., 'GET', 'POST'
267
+ const params = match[3]; // e.g., '$id = null'
268
+ const matchIndex = match.index;
116
269
 
117
- // axios calls
118
- { regex: /axios\.(get|post|put|patch|delete)\s*\(\s*[`'"](.*?)[`'"]/g, type: 'axios' },
119
- { regex: /axios\s*\(\s*\{[^}]*url\s*:\s*[`'"](.*?)[`'"]/g, type: 'axios-config' },
270
+ // Build the route path
271
+ let path = basePath;
272
+ if (action !== 'index') {
273
+ path += '/' + action.toLowerCase().replace(/_/g, '/');
274
+ }
120
275
 
121
- // jQuery ajax
122
- { regex: /\$\.(ajax|get|post)\s*\(\s*[`'"](.*?)[`'"]/g, type: 'jquery' },
276
+ // Add path parameters from function arguments
277
+ if (params) {
278
+ const paramMatches = params.match(/\$(\w+)(?:\s*=\s*[^,)]+)?/g);
279
+ if (paramMatches) {
280
+ paramMatches.forEach(p => {
281
+ const paramName = p.match(/\$(\w+)/)[1];
282
+ // Only add as path param if it's not optional (no default value) or looks like an ID
283
+ if (!p.includes('=') || paramName.toLowerCase().includes('id')) {
284
+ path += '/:' + paramName;
285
+ }
286
+ });
287
+ }
288
+ }
289
+
290
+ // Extract method body to analyze for body schema (for POST/PUT/PATCH)
291
+ let bodySchema = null;
292
+ if (['POST', 'PUT', 'PATCH'].includes(httpMethod)) {
293
+ bodySchema = extractBodySchemaFromMethod(content, matchIndex);
294
+ }
295
+
296
+ const route = {
297
+ method: httpMethod,
298
+ path,
299
+ description: `${action}_${match[2]}() method in ${filePath.split('/').pop()}`
300
+ };
301
+
302
+ if (bodySchema && Object.keys(bodySchema).length > 0) {
303
+ route.bodySchema = bodySchema;
304
+ }
305
+
306
+ routes.push(route);
307
+ }
308
+
309
+ return routes;
310
+ }
311
+
312
+ // Global cache for discovered complex structures (shared across file analysis)
313
+ const discoveredStructures = new Map();
314
+
315
+ /**
316
+ * Pre-scan a file to discover JSON structure patterns
317
+ * Looks for json_decode followed by array access patterns
318
+ */
319
+ function preDiscoverStructures(content) {
320
+ // Find patterns like: $actions = json_decode($actions, true);
321
+ const jsonDecodeRegex = /\$(\w+)\s*=\s*json_decode\s*\(\s*\$\1/gi;
322
+ let match;
323
+
324
+ while ((match = jsonDecodeRegex.exec(content)) !== null) {
325
+ const varName = match[1];
123
326
 
124
- // Generic API URLs
125
- { regex: /['"`](\/api\/[^'"`\s]+)['"`]/g, type: 'api-url' },
126
- { regex: /['"`](https?:\/\/[^'"`\s]*\/api[^'"`\s]*)['"`]/g, type: 'full-url' },
327
+ // Extract the structure used for this variable
328
+ const structure = extractNestedStructure(content, varName);
127
329
 
128
- // Method + URL patterns
129
- { regex: /(GET|POST|PUT|PATCH|DELETE)\s*[,:]?\s*['"`](\/[^'"`]+)['"`]/gi, type: 'method-url' },
330
+ // If we found a meaningful structure with examples, cache it
331
+ if (structure._example && Object.keys(structure._example).length > 0) {
332
+ // Determine field name from common patterns
333
+ // e.g., $actions = $this->post('actions'); $actions = json_decode($actions);
334
+ const fieldRegex = new RegExp(
335
+ `\\$${varName}\\s*=\\s*\\$this\\s*->\\s*(?:post|put|patch|get)\\s*\\(\\s*['"]([\\w]+)['"]`,
336
+ 'i'
337
+ );
338
+ const fieldMatch = content.match(fieldRegex);
339
+ if (fieldMatch) {
340
+ const fieldName = fieldMatch[1];
341
+ discoveredStructures.set(fieldName, structure);
342
+ } else {
343
+ // Use variable name as field name
344
+ discoveredStructures.set(varName, structure);
345
+ }
346
+ }
347
+ }
348
+
349
+ // Also look for library/model calls that reveal structure
350
+ // e.g., ->edit($update["settings"]), ->edit($update["integrations"])
351
+ const modelEditRegex = /(\w+_model|lib_\w+)\s*->\s*edit\s*\(\s*\$(\w+)\s*\[\s*["'](\w+)["']\s*\]/gi;
352
+ const discoveredSections = new Set();
353
+
354
+ while ((match = modelEditRegex.exec(content)) !== null) {
355
+ const section = match[3]; // e.g., 'settings', 'integrations', 'assignments'
356
+ discoveredSections.add(section);
357
+ }
358
+
359
+ // Store discovered sections globally so they can be applied later
360
+ if (discoveredSections.size > 0) {
361
+ const existingSections = discoveredStructures.get('_sections') || new Set();
362
+ for (const section of discoveredSections) {
363
+ existingSections.add(section);
364
+ }
365
+ discoveredStructures.set('_sections', existingSections);
366
+ }
367
+
368
+ // Also look for common field names with library calls
369
+ // e.g., $this->lib_settings->edit($actions) suggests 'actions' has a structure
370
+ const libCallRegex = /lib_settings\s*->\s*edit\s*\(\s*\$(\w+)/gi;
371
+ while ((match = libCallRegex.exec(content)) !== null) {
372
+ const varName = match[1];
373
+ // If we already discovered structure for 'actions', this confirms it
374
+ if (!discoveredStructures.has(varName)) {
375
+ // Mark it as a known complex field even if we don't have the structure
376
+ const existing = discoveredStructures.get('actions');
377
+ if (existing) {
378
+ discoveredStructures.set(varName, existing);
379
+ }
380
+ }
381
+ }
382
+ }
383
+
384
+ /**
385
+ * Extract body schema from a CodeIgniter controller method
386
+ * Looks for $this->post('field'), $this->input->post('field'), $this->put('field'), etc.
387
+ * Also analyzes nested array access patterns after json_decode
388
+ */
389
+ function extractBodySchemaFromMethod(content, methodStartIndex, fullFileContent = null) {
390
+ const bodyFields = {};
391
+
392
+ // Find the method body (from method start to next public function or end of class)
393
+ const methodContent = content.slice(methodStartIndex, methodStartIndex + 8000); // Increased limit
394
+ const endMatch = methodContent.match(/\n\s*(?:public|private|protected)\s+function\s+/);
395
+ const methodBody = endMatch ? methodContent.slice(0, endMatch.index) : methodContent;
396
+
397
+ // Use full file content for structure analysis if available
398
+ const analysisContent = fullFileContent || content;
399
+
400
+ // CodeIgniter REST: $this->post('field'), $this->put('field'), $this->patch('field')
401
+ const postRegex = /\$(\w+)\s*=\s*\$this\s*->\s*(post|put|patch|get)\s*\(\s*['"](\w+)['"]/gi;
402
+ let match;
403
+ while ((match = postRegex.exec(methodBody)) !== null) {
404
+ const varName = match[1];
405
+ const fieldName = match[3];
406
+
407
+ // Check if this is a complex field that likely contains JSON structure
408
+ const isComplexField = isLikelyJsonField(fieldName);
409
+
410
+ // Check if this var is later json_decoded OR is a known complex field
411
+ const jsonDecodeCheck = new RegExp(`\\$${varName}\\s*=\\s*json_decode\\s*\\(\\s*\\$${varName}`, 'i');
412
+ const hasJsonDecode = jsonDecodeCheck.test(methodBody);
413
+
414
+ if (hasJsonDecode || isComplexField) {
415
+ // Analyze nested structure for this variable
416
+ let nestedSchema = extractNestedStructure(analysisContent, varName);
417
+
418
+ // If we found structure, cache it for reuse
419
+ if (Object.keys(nestedSchema).length > 0 && !nestedSchema._example) {
420
+ // Has keys but no example - partial structure
421
+ } else if (Object.keys(nestedSchema).length > 0) {
422
+ discoveredStructures.set(fieldName, nestedSchema);
423
+ }
424
+
425
+ // If we didn't find structure locally, check cache
426
+ if (Object.keys(nestedSchema).length === 0 || !nestedSchema._example) {
427
+ const cachedStructure = discoveredStructures.get(fieldName);
428
+ if (cachedStructure) {
429
+ nestedSchema = cachedStructure;
430
+ }
431
+ }
432
+
433
+ if (Object.keys(nestedSchema).length > 0) {
434
+ bodyFields[fieldName] = nestedSchema;
435
+ } else {
436
+ bodyFields[fieldName] = 'object (JSON structure)';
437
+ }
438
+ } else {
439
+ bodyFields[fieldName] = inferFieldType(fieldName, methodBody);
440
+ }
441
+ }
442
+
443
+ // Simple post without assignment
444
+ const simplePostRegex = /\$this\s*->\s*(post|put|patch|get)\s*\(\s*['"](\w+)['"]/gi;
445
+ while ((match = simplePostRegex.exec(methodBody)) !== null) {
446
+ const fieldName = match[2];
447
+ if (!bodyFields[fieldName]) {
448
+ // Check if this is a known complex field
449
+ if (isLikelyJsonField(fieldName)) {
450
+ const cachedStructure = discoveredStructures.get(fieldName);
451
+ if (cachedStructure) {
452
+ bodyFields[fieldName] = cachedStructure;
453
+ } else {
454
+ bodyFields[fieldName] = 'object (JSON structure)';
455
+ }
456
+ } else {
457
+ bodyFields[fieldName] = inferFieldType(fieldName, methodBody);
458
+ }
459
+ }
460
+ }
461
+
462
+ // CodeIgniter 3: $this->input->post('field')
463
+ const inputPostRegex = /\$this\s*->\s*input\s*->\s*(post|get|put)\s*\(\s*['"](\w+)['"]/gi;
464
+ while ((match = inputPostRegex.exec(methodBody)) !== null) {
465
+ const fieldName = match[2];
466
+ if (!bodyFields[fieldName]) {
467
+ if (isLikelyJsonField(fieldName)) {
468
+ const cachedStructure = discoveredStructures.get(fieldName);
469
+ bodyFields[fieldName] = cachedStructure || 'object (JSON structure)';
470
+ } else {
471
+ bodyFields[fieldName] = inferFieldType(fieldName, methodBody);
472
+ }
473
+ }
474
+ }
475
+
476
+ // PHP $_POST['field'], $_GET['field'], $_REQUEST['field']
477
+ const globalPostRegex = /\$_(POST|GET|REQUEST|PUT)\s*\[\s*['"](\w+)['"]\s*\]/gi;
478
+ while ((match = globalPostRegex.exec(methodBody)) !== null) {
479
+ const fieldName = match[2];
480
+ if (!bodyFields[fieldName]) {
481
+ bodyFields[fieldName] = inferFieldType(fieldName, methodBody);
482
+ }
483
+ }
484
+
485
+ // Laravel/PHP: $request->input('field'), $request->get('field')
486
+ const requestRegex = /\$request\s*->\s*(input|get|post|all)\s*\(\s*['"](\w+)['"]/gi;
487
+ while ((match = requestRegex.exec(methodBody)) !== null) {
488
+ const fieldName = match[2];
489
+ if (!bodyFields[fieldName]) {
490
+ if (isLikelyJsonField(fieldName)) {
491
+ const cachedStructure = discoveredStructures.get(fieldName);
492
+ bodyFields[fieldName] = cachedStructure || 'object (JSON structure)';
493
+ } else {
494
+ bodyFields[fieldName] = inferFieldType(fieldName, methodBody);
495
+ }
496
+ }
497
+ }
498
+
499
+ return bodyFields;
500
+ }
501
+
502
+ /**
503
+ * Check if a field name likely contains JSON/object data
504
+ */
505
+ function isLikelyJsonField(fieldName) {
506
+ const jsonFieldNames = [
507
+ 'actions', 'data', 'body', 'payload', 'params', 'options', 'settings',
508
+ 'config', 'meta', 'metadata', 'attributes', 'properties', 'fields',
509
+ 'items', 'records', 'entries', 'values', 'content', 'json'
130
510
  ];
511
+ return jsonFieldNames.includes(fieldName.toLowerCase());
512
+ }
131
513
 
132
- let hasApiCalls = false;
133
- const extractedBlocks = [];
514
+ /**
515
+ * Extract nested structure from array access patterns
516
+ * e.g., $actions['settings']['updates'] -> { settings: { updates: [] } }
517
+ */
518
+ function extractNestedStructure(methodBody, varName) {
519
+ const structure = {};
520
+ const itemFields = new Set();
134
521
 
135
- // FIRST: Extract API base URL definitions (critical for resolving paths)
136
- const baseUrlDefinitions = [];
137
- for (let i = 0; i < lines.length; i++) {
138
- const line = lines[i];
139
- // Match: const API_BASE = ..., const baseURL = ..., const apiUrl = ..., etc.
140
- if (/^\s*(const|let|var)\s+(API_BASE|API_URL|BASE_URL|baseURL|apiUrl|apiBase|API_ENDPOINT)/i.test(line)) {
141
- baseUrlDefinitions.push(`${i + 1}: ${line}`);
522
+ // Match direct access patterns like $varName['key1']['key2']
523
+ const arrayAccessRegex = new RegExp(
524
+ `\\$${varName}\\s*\\[\\s*['"]([\\w]+)['"]\\s*\\]\\s*\\[\\s*['"]([\\w]+)['"]\\s*\\]`,
525
+ 'gi'
526
+ );
527
+
528
+ let match;
529
+ while ((match = arrayAccessRegex.exec(methodBody)) !== null) {
530
+ const key1 = match[1];
531
+ const key2 = match[2];
532
+
533
+ if (!structure[key1]) structure[key1] = {};
534
+ structure[key1][key2] = []; // Arrays like updates, inserts, deletes
535
+ }
536
+
537
+ // Look for foreach patterns to find nested iterators
538
+ // Pattern: foreach ($varName as $key => &$iterVar) { ... foreach ($iterVar['subkey'] as &$item) }
539
+ const foreachRegex = new RegExp(
540
+ `foreach\\s*\\(\\s*\\$${varName}\\s+as\\s+(?:\\$\\w+\\s*=>\\s*)?[&]?\\$(\\w+)\\s*\\)`,
541
+ 'gi'
542
+ );
543
+
544
+ while ((match = foreachRegex.exec(methodBody)) !== null) {
545
+ const iterVar = match[1];
546
+
547
+ // Find nested foreach on iterator's array property
548
+ // e.g., foreach ($settings['updates'] as &$setting)
549
+ const nestedForeachRegex = new RegExp(
550
+ `foreach\\s*\\(\\s*\\$${iterVar}\\s*\\[\\s*['"]([\\w]+)['"]\\s*\\]\\s+as\\s+(?:\\$\\w+\\s*=>\\s*)?[&]?\\$(\\w+)\\s*\\)`,
551
+ 'gi'
552
+ );
553
+
554
+ let nestedMatch;
555
+ while ((nestedMatch = nestedForeachRegex.exec(methodBody)) !== null) {
556
+ const arrayKey = nestedMatch[1]; // e.g., 'updates'
557
+ const itemVar = nestedMatch[2]; // e.g., 'setting'
558
+
559
+ // Find all field accesses on the item variable
560
+ const itemAccessRegex = new RegExp(`\\$${itemVar}\\s*\\[\\s*['"]([\\w]+)['"]\\s*\\]`, 'gi');
561
+ let itemMatch;
562
+ while ((itemMatch = itemAccessRegex.exec(methodBody)) !== null) {
563
+ itemFields.add(itemMatch[1]);
564
+ }
565
+ }
566
+ }
567
+
568
+ // Also look for direct item field accesses (common variable names)
569
+ const commonItemVars = ['setting', 'item', 'update', 'insert', 'delete', 'row', 'entry', 'data', 'integration', 'assignment'];
570
+ for (const itemVar of commonItemVars) {
571
+ const itemAccessRegex = new RegExp(`\\$${itemVar}\\s*\\[\\s*['"]([\\w]+)['"]\\s*\\]`, 'gi');
572
+ let itemMatch;
573
+ while ((itemMatch = itemAccessRegex.exec(methodBody)) !== null) {
574
+ itemFields.add(itemMatch[1]);
575
+ }
576
+ }
577
+
578
+ // Look for common section names that might be used alongside 'settings'
579
+ // Pattern: $actions['sectionName'] or $actions["sectionName"]
580
+ const sectionRegex = new RegExp(`\\$${varName}\\s*\\[\\s*['"]([\\w]+)['"]\\s*\\]`, 'gi');
581
+ const sections = new Set();
582
+ let sectionMatch;
583
+ while ((sectionMatch = sectionRegex.exec(methodBody)) !== null) {
584
+ sections.add(sectionMatch[1]);
585
+ }
586
+
587
+ // Common section names in settings/actions payloads
588
+ const commonSections = ['settings', 'assignments', 'integrations', 'notifications', 'permissions'];
589
+ for (const section of commonSections) {
590
+ // Check if this section is referenced anywhere in the codebase
591
+ if (methodBody.includes(`['${section}']`) || methodBody.includes(`["${section}"]`)) {
592
+ sections.add(section);
593
+ }
594
+ }
595
+
596
+ // Add sections discovered during pre-scan phase
597
+ const globalSections = discoveredStructures.get('_sections');
598
+ if (globalSections) {
599
+ for (const section of globalSections) {
600
+ // Filter out false positives (id, id_user, etc. are not sections)
601
+ if (!section.startsWith('id') && section.length > 2) {
602
+ sections.add(section);
603
+ }
604
+ }
605
+ }
606
+
607
+ // Build item schema from collected fields
608
+ const itemSchema = {};
609
+ for (const field of itemFields) {
610
+ itemSchema[field] = inferFieldType(field, methodBody);
611
+ }
612
+
613
+ // Ensure common fields are included
614
+ const commonFields = ['key', 'value', 'id_facility', 'type', 'category', 'id_file', 'integrator'];
615
+ for (const field of commonFields) {
616
+ if (!itemSchema[field]) {
617
+ // Check if field is referenced in content
618
+ if (methodBody.includes(`['${field}']`) || methodBody.includes(`["${field}"]`)) {
619
+ itemSchema[field] = inferFieldType(field, methodBody);
620
+ }
621
+ }
622
+ }
623
+
624
+ // Build structure for all discovered sections
625
+ if (sections.size > 0 && Object.keys(itemSchema).length > 0) {
626
+ for (const section of sections) {
627
+ if (!structure[section]) {
628
+ structure[section] = {};
629
+ }
630
+ // Add updates/inserts/deletes for each section
631
+ for (const action of ['updates', 'inserts', 'deletes']) {
632
+ structure[section][action] = `array of { ${Object.entries(itemSchema).map(([k, v]) => `${k}: ${v}`).join(', ')} }`;
633
+ }
142
634
  }
143
- // Also match: axios.defaults.baseURL = ...
144
- if (/baseURL\s*[:=]/i.test(line)) {
145
- baseUrlDefinitions.push(`${i + 1}: ${line}`);
635
+ } else if (Object.keys(itemSchema).length > 0) {
636
+ // Apply item schema to existing structure
637
+ for (const [key, value] of Object.entries(structure)) {
638
+ if (typeof value === 'object' && !key.startsWith('_')) {
639
+ for (const [subKey, subValue] of Object.entries(value)) {
640
+ if (Array.isArray(subValue) || ['updates', 'inserts', 'deletes', 'items'].includes(subKey)) {
641
+ structure[key][subKey] = `array of { ${Object.entries(itemSchema).map(([k, v]) => `${k}: ${v}`).join(', ')} }`;
642
+ }
643
+ }
644
+ }
146
645
  }
147
646
  }
148
647
 
149
- if (baseUrlDefinitions.length > 0) {
150
- extractedBlocks.push('// API BASE URL DEFINITIONS (use these to resolve full paths):\n' + baseUrlDefinitions.join('\n'));
151
- hasApiCalls = true;
648
+ // Generate a complete example
649
+ if (Object.keys(structure).length > 0 && Object.keys(itemSchema).length > 0) {
650
+ const exampleItem = {};
651
+ for (const [field, type] of Object.entries(itemSchema)) {
652
+ exampleItem[field] = getExampleValue(field, type);
653
+ }
654
+
655
+ // Example for integration item (with integrator field)
656
+ const integrationExampleItem = { ...exampleItem, integrator: 'mews' };
657
+
658
+ structure._example = {};
659
+ for (const [key, value] of Object.entries(structure)) {
660
+ if (key.startsWith('_')) continue;
661
+ structure._example[key] = {};
662
+ for (const [subKey] of Object.entries(value)) {
663
+ if (['updates', 'inserts', 'deletes'].includes(subKey)) {
664
+ if (key === 'integrations') {
665
+ structure._example[key][subKey] = subKey === 'updates' ? [integrationExampleItem] : [];
666
+ } else {
667
+ structure._example[key][subKey] = subKey === 'updates' ? [exampleItem] : [];
668
+ }
669
+ }
670
+ }
671
+ }
152
672
  }
673
+
674
+ return structure;
675
+ }
153
676
 
154
- // Line-by-line extraction with context
677
+ /**
678
+ * Generate an example JSON body based on extracted structure
679
+ */
680
+ function generateExample(structure, itemFields) {
681
+ const example = {};
682
+
683
+ for (const [key, value] of Object.entries(structure)) {
684
+ if (key.startsWith('_')) continue;
685
+
686
+ if (typeof value === 'object' && value !== null) {
687
+ // Nested object
688
+ if (value._arrayOf) {
689
+ // Array of items
690
+ const itemExample = {};
691
+ for (const [itemKey, itemType] of Object.entries(value._arrayOf)) {
692
+ itemExample[itemKey] = getExampleValue(itemKey, itemType);
693
+ }
694
+ example[key] = [itemExample];
695
+ } else {
696
+ example[key] = generateExample(value, itemFields);
697
+ }
698
+ } else {
699
+ example[key] = getExampleValue(key, value);
700
+ }
701
+ }
702
+
703
+ return example;
704
+ }
705
+
706
+ /**
707
+ * Get example value for a field based on its name and type
708
+ */
709
+ function getExampleValue(fieldName, fieldType) {
710
+ const nameLower = fieldName.toLowerCase();
711
+
712
+ if (nameLower === 'key') return 'hotel_title';
713
+ if (nameLower === 'value') return 'My Hotel Name';
714
+ if (nameLower === 'id_facility' || nameLower === 'id_file') return '-1';
715
+ if (nameLower === 'type') return 'text';
716
+ if (nameLower === 'category') return 'main';
717
+ if (nameLower === 'id') return 1;
718
+ if (nameLower.includes('email')) return 'user@example.com';
719
+ if (fieldType === 'integer' || fieldType === 'number') return 1;
720
+ if (fieldType === 'boolean') return true;
721
+ if (fieldType === 'array') return [];
722
+
723
+ return 'string_value';
724
+ }
725
+
726
+ /**
727
+ * Infer field type from field name and context
728
+ */
729
+ function inferFieldType(fieldName, context) {
730
+ const nameLower = fieldName.toLowerCase();
731
+
732
+ // ID fields
733
+ if (nameLower.startsWith('id_') || nameLower.endsWith('_id') || nameLower === 'id') {
734
+ return 'integer';
735
+ }
736
+
737
+ // Boolean-ish fields
738
+ if (nameLower.startsWith('is_') || nameLower.startsWith('has_') ||
739
+ nameLower.includes('enabled') || nameLower.includes('active') ||
740
+ nameLower.includes('flag') || nameLower === 'status') {
741
+ return 'boolean';
742
+ }
743
+
744
+ // Date/time fields
745
+ if (nameLower.includes('date') || nameLower.includes('time') ||
746
+ nameLower.includes('_at') || nameLower === 'created' || nameLower === 'updated') {
747
+ return 'datetime';
748
+ }
749
+
750
+ // Numeric fields
751
+ if (nameLower.includes('count') || nameLower.includes('amount') ||
752
+ nameLower.includes('price') || nameLower.includes('quantity') ||
753
+ nameLower.includes('total') || nameLower.includes('number')) {
754
+ return 'number';
755
+ }
756
+
757
+ // Email
758
+ if (nameLower.includes('email')) {
759
+ return 'email';
760
+ }
761
+
762
+ // Array/JSON fields
763
+ if (nameLower.includes('items') || nameLower.includes('list') ||
764
+ nameLower.includes('array') || nameLower === 'data' || nameLower === 'actions') {
765
+ return 'array|object';
766
+ }
767
+
768
+ // Default to string
769
+ return 'string';
770
+ }
771
+
772
+ /**
773
+ * Categorize files by priority
774
+ */
775
+ function categorizeFiles(files, basePath) {
776
+ const highPriority = [];
777
+ const mediumPriority = [];
778
+ const other = [];
779
+
780
+ for (const file of files) {
781
+ const relativePath = path.relative(basePath, file);
782
+
783
+ if (HIGH_PRIORITY_PATTERNS.some(p => p.test(relativePath))) {
784
+ highPriority.push(file);
785
+ } else if (MEDIUM_PRIORITY_PATTERNS.some(p => p.test(relativePath))) {
786
+ mediumPriority.push(file);
787
+ } else {
788
+ other.push(file);
789
+ }
790
+ }
791
+
792
+ return { highPriority, mediumPriority, other };
793
+ }
794
+
795
+ /**
796
+ * Extract Swagger/OpenAPI documentation blocks
797
+ */
798
+ function extractSwaggerBlocks(content) {
799
+ const swaggerBlocks = [];
800
+
801
+ // Match JSDoc blocks with @swagger
802
+ const swaggerRegex = /\/\*\*[\s\S]*?@swagger[\s\S]*?\*\//g;
803
+ let match;
804
+
805
+ while ((match = swaggerRegex.exec(content)) !== null) {
806
+ swaggerBlocks.push(match[0]);
807
+ }
808
+
809
+ return swaggerBlocks;
810
+ }
811
+
812
+ /**
813
+ * Extract TypeScript interfaces and types that look like DTOs/schemas
814
+ */
815
+ function extractTypeDefinitions(content, filePath) {
816
+ const typeBlocks = [];
817
+ const lines = content.split('\n');
818
+
819
+ // Look for interfaces and types that might be request/response schemas
820
+ const typeKeywords = [
821
+ /interface\s+\w*(Request|Response|Dto|Body|Query|Params|Payload|Input|Output)\w*\s*\{/i,
822
+ /type\s+\w*(Request|Response|Dto|Body|Query|Params|Payload|Input|Output)\w*\s*=/i,
823
+ /interface\s+\w+\s*\{/, // Any interface in DTO/schema files
824
+ /type\s+\w+\s*=/ // Any type in DTO/schema files
825
+ ];
826
+
827
+ // Only extract type definitions from files that look like DTOs/types
828
+ const isDtoFile = /dto|interface|type|schema|model/i.test(filePath);
829
+
155
830
  for (let i = 0; i < lines.length; i++) {
156
831
  const line = lines[i];
157
832
 
158
- // Check for API patterns
159
- const hasPattern = patterns.some(p => p.regex.test(line));
160
- // Reset regex lastIndex
161
- patterns.forEach(p => p.regex.lastIndex = 0);
162
-
163
- // Also check for common API keywords
164
- const hasKeyword = /fetch|axios|\.ajax|\.get\(|\.post\(|\.put\(|\.delete\(|\/api\/|endpoint/i.test(line);
833
+ const isTypeDefinition = typeKeywords.some(regex => regex.test(line));
165
834
 
166
- // Check if this is a POST/PUT request - need more context for body structure
167
- const isPostRequest = /method:\s*['"]POST|\.post\(|method:\s*['"]PUT|\.put\(/i.test(line);
835
+ if (isTypeDefinition || (isDtoFile && /^(export\s+)?(interface|type)\s+/.test(line))) {
836
+ // Find the complete type block (until closing brace at same indent level)
837
+ let braceCount = 0;
838
+ let started = false;
839
+ let endLine = i;
840
+
841
+ for (let j = i; j < lines.length && j < i + 50; j++) {
842
+ const l = lines[j];
843
+ for (const char of l) {
844
+ if (char === '{') {
845
+ braceCount++;
846
+ started = true;
847
+ } else if (char === '}') {
848
+ braceCount--;
849
+ }
850
+ }
851
+ endLine = j;
852
+ if (started && braceCount === 0) break;
853
+ }
854
+
855
+ const block = lines.slice(i, endLine + 1)
856
+ .map((l, idx) => `${i + idx + 1}: ${l}`)
857
+ .join('\n');
858
+
859
+ typeBlocks.push(block);
860
+ i = endLine; // Skip processed lines
861
+ }
862
+ }
863
+
864
+ return typeBlocks;
865
+ }
866
+
867
+ /**
868
+ * Extract request body and query parameter patterns
869
+ */
870
+ function extractRequestPatterns(content) {
871
+ const patterns = [];
872
+ const lines = content.split('\n');
873
+
874
+ for (let i = 0; i < lines.length; i++) {
875
+ const line = lines[i];
168
876
 
169
- if (hasPattern || hasKeyword) {
170
- hasApiCalls = true;
877
+ // Destructuring patterns for req.body, req.query, req.params
878
+ if (/(?:const|let|var)\s*\{[^}]+\}\s*=\s*req\.(body|query|params)/i.test(line) ||
879
+ /req\.(body|query|params)\s*[;.]/.test(line)) {
171
880
 
172
- // Get MORE context for POST/PUT requests to capture body structure definitions
173
- // Body objects are often defined 15-25 lines before the fetch call
174
- const contextBefore = isPostRequest ? 25 : 3; // More context for POST
175
- const contextAfter = isPostRequest ? 10 : 5;
881
+ // Get context around it
882
+ const startLine = Math.max(0, i - 2);
883
+ const endLine = Math.min(lines.length - 1, i + 5);
176
884
 
177
- const startLine = Math.max(0, i - contextBefore);
178
- const endLine = Math.min(lines.length - 1, i + contextAfter);
885
+ const block = lines.slice(startLine, endLine + 1)
886
+ .map((l, idx) => `${startLine + idx + 1}: ${l}`)
887
+ .join('\n');
888
+
889
+ patterns.push(`// Request parameter extraction:\n${block}`);
890
+ i = endLine;
891
+ }
892
+
893
+ // NestJS decorators: @Body(), @Query(), @Param()
894
+ if (/@(Body|Query|Param|Headers)\s*\(/i.test(line)) {
895
+ const startLine = Math.max(0, i - 1);
896
+ const endLine = Math.min(lines.length - 1, i + 3);
179
897
 
180
898
  const block = lines.slice(startLine, endLine + 1)
181
899
  .map((l, idx) => `${startLine + idx + 1}: ${l}`)
182
900
  .join('\n');
183
901
 
184
- extractedBlocks.push(block);
902
+ patterns.push(`// NestJS parameter decorator:\n${block}`);
903
+ i = endLine;
904
+ }
905
+
906
+ // Validation schemas: Joi, Zod, class-validator
907
+ if (/Joi\.(object|string|number|array|boolean)\s*\(/i.test(line) ||
908
+ /z\.(object|string|number|array|boolean)\s*\(/i.test(line) ||
909
+ /@(IsString|IsNumber|IsArray|IsBoolean|IsOptional|ValidateNested)/i.test(line)) {
185
910
 
186
- // Skip ahead to avoid duplicates
911
+ const startLine = Math.max(0, i - 2);
912
+ const endLine = Math.min(lines.length - 1, i + 10);
913
+
914
+ const block = lines.slice(startLine, endLine + 1)
915
+ .map((l, idx) => `${startLine + idx + 1}: ${l}`)
916
+ .join('\n');
917
+
918
+ patterns.push(`// Validation schema:\n${block}`);
187
919
  i = endLine;
188
920
  }
189
921
  }
922
+
923
+ return patterns;
924
+ }
190
925
 
191
- // If we found API patterns, return extracted content
192
- if (hasApiCalls && extractedBlocks.length > 0) {
193
- return {
194
- hasApiCalls: true,
195
- content: `// File: ${filePath}\n// API patterns found:\n\n${extractedBlocks.join('\n\n// ---\n\n')}`
196
- };
926
+ /**
927
+ * Extract CodeIgniter REST controller methods and map to routes
928
+ * e.g., index_get() -> GET /path, index_post() -> POST /path
929
+ * e.g., items_get() -> GET /path/items, user_get($id) -> GET /path/user/:id
930
+ */
931
+ function extractCodeIgniterMethods(content, basePath) {
932
+ const routes = [];
933
+
934
+ // Match public function methodname_httpverb($params)
935
+ // CodeIgniter REST convention: {action}_{method}
936
+ const methodRegex = /public\s+function\s+(\w+)_(get|post|put|patch|delete)\s*\(([^)]*)\)/gi;
937
+ let match;
938
+
939
+ while ((match = methodRegex.exec(content)) !== null) {
940
+ const action = match[1]; // e.g., 'index', 'items', 'user'
941
+ const method = match[2].toUpperCase(); // e.g., 'GET', 'POST'
942
+ const params = match[3]; // e.g., '$id = null'
943
+
944
+ // Build the route path
945
+ let path = basePath;
946
+ if (action !== 'index') {
947
+ path += '/' + action.toLowerCase().replace(/_/g, '/');
948
+ }
949
+
950
+ // Add path parameters from function arguments
951
+ if (params) {
952
+ const paramNames = params.match(/\$(\w+)/g);
953
+ if (paramNames) {
954
+ paramNames.forEach(p => {
955
+ const paramName = p.replace('$', '');
956
+ if (!paramName.includes('=')) { // Not optional
957
+ path += '/:' + paramName;
958
+ }
959
+ });
960
+ }
961
+ }
962
+
963
+ routes.push(`// ${method} ${path} - from ${match[1]}_${match[2]}()`);
197
964
  }
965
+
966
+ return routes;
967
+ }
198
968
 
199
- // Fallback: include first 5000 chars if file looks relevant
200
- if (/api|fetch|axios|service|http/i.test(filePath)) {
201
- return {
202
- hasApiCalls: true,
203
- content: `// File: ${filePath}\n${content.slice(0, 5000)}`
204
- };
969
+ /**
970
+ * Extract route path from CodeIgniter/Laravel controller file path
971
+ * e.g., application/controllers/api/v2/Settings.php -> /api/v2/settings
972
+ */
973
+ function extractRouteFromControllerPath(filePath) {
974
+ // CodeIgniter 3: application/controllers/api/v2/Settings.php -> /api/v2/settings
975
+ let match = filePath.match(/application\/controllers\/(.+)\.php$/i);
976
+ if (match) {
977
+ const route = '/' + match[1].toLowerCase().replace(/\/index$/, '');
978
+ return route;
205
979
  }
206
-
207
- return { hasApiCalls: false, content: '' };
980
+
981
+ // CodeIgniter 4: app/Controllers/Api/V2/Settings.php -> /api/v2/settings
982
+ match = filePath.match(/app\/Controllers\/(.+)\.php$/i);
983
+ if (match) {
984
+ const route = '/' + match[1].toLowerCase().replace(/\/index$/, '');
985
+ return route;
986
+ }
987
+
988
+ // Laravel: app/Http/Controllers/Api/V2/SettingsController.php -> /api/v2/settings
989
+ match = filePath.match(/app\/Http\/Controllers\/(.+)Controller\.php$/i);
990
+ if (match) {
991
+ const route = '/' + match[1].toLowerCase().replace(/\/index$/, '');
992
+ return route;
993
+ }
994
+
995
+ return null;
208
996
  }
209
997
 
210
998
  /**
211
- * Prioritize frontend files based on keywords
999
+ * Extract API patterns from file content - framework agnostic
212
1000
  */
213
- function prioritizeFiles(files, basePath) {
214
- return files.sort((a, b) => {
215
- const aPath = path.relative(basePath, a).toLowerCase();
216
- const bPath = path.relative(basePath, b).toLowerCase();
1001
+ function extractApiPatterns(content, filePath, absolutePath) {
1002
+ const lines = content.split('\n');
1003
+ let hasApiPatterns = false;
1004
+ const extractedBlocks = [];
1005
+
1006
+ // Check if this is a route/controller file - if so, include more context
1007
+ const isRouteFile = HIGH_PRIORITY_PATTERNS.some(p => p.test(filePath));
1008
+ // DTO/Model/Entity files across languages
1009
+ const isDtoFile = /dto|interface|types?|schema|model|entity|request|response|form/i.test(filePath);
1010
+
1011
+ // For CodeIgniter/Laravel controllers, extract the route from file path
1012
+ const conventionRoute = extractRouteFromControllerPath(filePath);
1013
+ if (conventionRoute) {
1014
+ // Extract HTTP methods from CodeIgniter-style method names
1015
+ const ciMethods = extractCodeIgniterMethods(content, conventionRoute);
1016
+ if (ciMethods.length > 0) {
1017
+ extractedBlocks.push(`// CONVENTION-BASED ROUTES FROM CONTROLLER:\n${ciMethods.join('\n')}`);
1018
+ hasApiPatterns = true;
1019
+ } else {
1020
+ extractedBlocks.push(`// CONVENTION-BASED ROUTE: ${conventionRoute}\n// Controller file path maps to this API endpoint`);
1021
+ hasApiPatterns = true;
1022
+ }
1023
+ }
1024
+
1025
+ // ============================================
1026
+ // PATTERN 0: Swagger/OpenAPI documentation
1027
+ // ============================================
1028
+ const swaggerBlocks = extractSwaggerBlocks(content);
1029
+ if (swaggerBlocks.length > 0) {
1030
+ hasApiPatterns = true;
1031
+ extractedBlocks.push(`// Swagger/OpenAPI documentation found:\n${swaggerBlocks.join('\n\n')}`);
1032
+ }
1033
+
1034
+ // ============================================
1035
+ // PATTERN 1: TypeScript interfaces/types (DTOs, schemas)
1036
+ // ============================================
1037
+ const typeBlocks = extractTypeDefinitions(content, filePath);
1038
+ if (typeBlocks.length > 0) {
1039
+ hasApiPatterns = true;
1040
+ extractedBlocks.push(`// TypeScript type definitions:\n${typeBlocks.join('\n\n')}`);
1041
+ }
1042
+
1043
+ // ============================================
1044
+ // PATTERN 2: Request body/query extraction
1045
+ // ============================================
1046
+ const requestPatterns = extractRequestPatterns(content);
1047
+ if (requestPatterns.length > 0) {
1048
+ hasApiPatterns = true;
1049
+ extractedBlocks.push(requestPatterns.join('\n\n'));
1050
+ }
1051
+
1052
+ // ============================================
1053
+ // PATTERN 3: Express/Koa/Fastify Router definitions (JavaScript/TypeScript)
1054
+ // ============================================
1055
+ const routerPatterns = [
1056
+ // Express Router: router.get('/path', handler)
1057
+ /\.(get|post|put|patch|delete|all|use)\s*\(\s*['"`]([^'"`]+)['"`]/gi,
1058
+ // Express app: app.get('/path', handler)
1059
+ /app\.(get|post|put|patch|delete|all|use)\s*\(\s*['"`]([^'"`]+)['"`]/gi,
1060
+ // Fastify: fastify.get('/path', handler)
1061
+ /fastify\.(get|post|put|patch|delete|all)\s*\(\s*['"`]([^'"`]+)['"`]/gi,
1062
+ ];
1063
+
1064
+ // ============================================
1065
+ // PATTERN 4: NestJS/Decorators (JavaScript/TypeScript)
1066
+ // ============================================
1067
+ const decoratorPatterns = [
1068
+ /@(Get|Post|Put|Patch|Delete|All)\s*\(\s*['"`]?([^'"`\)]*)/gi,
1069
+ /@Controller\s*\(\s*['"`]([^'"`]+)/gi,
1070
+ ];
1071
+
1072
+ // ============================================
1073
+ // PATTERN PHP: CodeIgniter, Laravel, Symfony routes
1074
+ // ============================================
1075
+ const phpPatterns = [
1076
+ // CodeIgniter 4: $routes->get('path', 'Controller::method')
1077
+ /\$routes\s*->\s*(get|post|put|patch|delete|add|match|cli)\s*\(\s*['"]([^'"]+)['"]/gi,
1078
+ // CodeIgniter 4: $routes->group()
1079
+ /\$routes\s*->\s*group\s*\(\s*['"]([^'"]+)['"]/gi,
1080
+ // CodeIgniter 3: $route['path'] = 'controller/method'
1081
+ /\$route\s*\[\s*['"]([^'"]+)['"]\s*\]/gi,
1082
+ // Laravel: Route::get('path', ...)
1083
+ /Route\s*::\s*(get|post|put|patch|delete|any|match|resource|apiResource)\s*\(\s*['"]([^'"]+)['"]/gi,
1084
+ // Laravel route groups
1085
+ /Route\s*::\s*(prefix|group|middleware)\s*\(\s*['"]([^'"]+)['"]/gi,
1086
+ // Symfony annotations: @Route("/path")
1087
+ /@Route\s*\(\s*['"]([^'"]+)['"]/gi,
1088
+ // Symfony PHP 8 attributes: #[Route('/path')]
1089
+ /#\[Route\s*\(\s*['"]([^'"]+)['"]/gi,
1090
+ // PHP method definitions in controllers (for convention-based routing)
1091
+ /public\s+function\s+(\w+)\s*\([^)]*\)/gi,
1092
+ // PHP class definitions (to get controller names)
1093
+ /class\s+(\w+)\s+extends\s+(\w*Controller|CI_Controller|BaseController|ResourceController)/gi,
1094
+ ];
1095
+
1096
+ // ============================================
1097
+ // PATTERN Python: Flask, FastAPI, Django
1098
+ // ============================================
1099
+ const pythonPatterns = [
1100
+ // Flask: @app.route('/path')
1101
+ /@app\.(route|get|post|put|patch|delete)\s*\(\s*['"]([^'"]+)['"]/gi,
1102
+ // Flask Blueprint: @blueprint.route('/path')
1103
+ /@\w+\.(route|get|post|put|patch|delete)\s*\(\s*['"]([^'"]+)['"]/gi,
1104
+ // FastAPI: @app.get('/path'), @router.post('/path')
1105
+ /@(app|router)\.(get|post|put|patch|delete|api_route)\s*\(\s*['"]([^'"]+)['"]/gi,
1106
+ // Django: path('route/', view)
1107
+ /path\s*\(\s*['"]([^'"]+)['"]/gi,
1108
+ // Django: url(r'^route/$', view)
1109
+ /url\s*\(\s*r?['"]([^'"]+)['"]/gi,
1110
+ ];
1111
+
1112
+ // ============================================
1113
+ // PATTERN Ruby: Rails, Sinatra
1114
+ // ============================================
1115
+ const rubyPatterns = [
1116
+ // Rails: get '/path', post '/path', resources :items
1117
+ /^\s*(get|post|put|patch|delete|resources|resource|root|match)\s+['":]/gim,
1118
+ // Rails namespace/scope
1119
+ /(namespace|scope)\s+[:'"](\w+)/gi,
1120
+ // Sinatra: get '/path' do
1121
+ /^\s*(get|post|put|patch|delete)\s+['"]([^'"]+)['"]\s+do/gim,
1122
+ ];
1123
+
1124
+ // ============================================
1125
+ // PATTERN Go: Gin, Echo, Chi, net/http
1126
+ // ============================================
1127
+ const goPatterns = [
1128
+ // Gin: r.GET("/path", handler), router.POST("/path", handler)
1129
+ /\.(GET|POST|PUT|PATCH|DELETE|Handle|Any|Group)\s*\(\s*["']([^"']+)["']/gi,
1130
+ // Echo: e.GET("/path", handler)
1131
+ /e\.(GET|POST|PUT|PATCH|DELETE|Any|Group)\s*\(\s*["']([^"']+)["']/gi,
1132
+ // Chi: r.Get("/path", handler), r.Route("/path", ...)
1133
+ /r\.(Get|Post|Put|Patch|Delete|Route|Group|Mount)\s*\(\s*["']([^"']+)["']/gi,
1134
+ // net/http: http.HandleFunc("/path", handler)
1135
+ /http\.(HandleFunc|Handle)\s*\(\s*["']([^"']+)["']/gi,
1136
+ // Gorilla mux: r.HandleFunc("/path", handler).Methods("GET")
1137
+ /HandleFunc\s*\(\s*["']([^"']+)["']/gi,
1138
+ ];
1139
+
1140
+ // ============================================
1141
+ // PATTERN Java/Kotlin: Spring Boot
1142
+ // ============================================
1143
+ const javaPatterns = [
1144
+ // Spring: @GetMapping("/path"), @PostMapping("/path")
1145
+ /@(GetMapping|PostMapping|PutMapping|PatchMapping|DeleteMapping|RequestMapping)\s*\(\s*(?:value\s*=\s*)?["']?([^"'\)]+)/gi,
1146
+ // Spring: @RequestMapping(method = RequestMethod.GET)
1147
+ /@RequestMapping\s*\([^)]*method\s*=\s*RequestMethod\.(GET|POST|PUT|PATCH|DELETE)/gi,
1148
+ // JAX-RS: @GET, @POST, @Path("/path")
1149
+ /@(GET|POST|PUT|PATCH|DELETE|Path)\s*(?:\(\s*["']([^"']+)["']\s*\))?/gi,
1150
+ ];
1151
+
1152
+ // ============================================
1153
+ // PATTERN 5: Frontend HTTP calls
1154
+ // ============================================
1155
+ const httpCallPatterns = [
1156
+ // fetch() calls
1157
+ /fetch\s*\(\s*[`'"](.*?)[`'"]/g,
1158
+ /fetch\s*\(\s*`([^`]*)`/g,
1159
+ // axios calls
1160
+ /axios\.(get|post|put|patch|delete)\s*\(\s*[`'"](.*?)[`'"]/g,
1161
+ /axios\s*\(\s*\{[^}]*url\s*:\s*[`'"](.*?)[`'"]/g,
1162
+ // Angular HttpClient
1163
+ /this\.http\.(get|post|put|patch|delete)\s*[<(]/g,
1164
+ /httpClient\.(get|post|put|patch|delete)\s*[<(]/g,
1165
+ // jQuery ajax
1166
+ /\$\.(ajax|get|post)\s*\(\s*[`'"](.*?)[`'"]/g,
1167
+ // Generic request libraries
1168
+ /request\.(get|post|put|patch|delete)\s*\(/g,
1169
+ /got\.(get|post|put|patch|delete)\s*\(/g,
1170
+ /superagent\.(get|post|put|patch|delete)\s*\(/g,
1171
+ ];
1172
+
1173
+ // ============================================
1174
+ // PATTERN 6: API URL definitions
1175
+ // ============================================
1176
+ const urlPatterns = [
1177
+ // API endpoints in strings
1178
+ /['"`](\/api\/[^'"`\s]+)['"`]/g,
1179
+ /['"`](\/v\d+\/[^'"`\s]+)['"`]/g,
1180
+ /['"`](https?:\/\/[^'"`\s]*\/api[^'"`\s]*)['"`]/g,
1181
+ // Base URL definitions
1182
+ /(?:API_BASE|API_URL|BASE_URL|baseURL|apiUrl|apiBase|API_ENDPOINT|BACKEND_URL)\s*[:=]\s*['"`]([^'"`]+)['"`]/gi,
1183
+ ];
1184
+
1185
+ // ============================================
1186
+ // PATTERN 7: Method + URL combinations
1187
+ // ============================================
1188
+ const methodUrlPatterns = [
1189
+ /(GET|POST|PUT|PATCH|DELETE)\s*[,:]?\s*['"`](\/[^'"`]+)['"`]/gi,
1190
+ /method:\s*['"`](GET|POST|PUT|PATCH|DELETE)['"`]/gi,
1191
+ ];
1192
+
1193
+ // Combine all patterns for line scanning (all languages)
1194
+ const allPatterns = [
1195
+ ...routerPatterns,
1196
+ ...decoratorPatterns,
1197
+ ...phpPatterns,
1198
+ ...pythonPatterns,
1199
+ ...rubyPatterns,
1200
+ ...goPatterns,
1201
+ ...javaPatterns,
1202
+ ...httpCallPatterns,
1203
+ ...urlPatterns,
1204
+ ...methodUrlPatterns
1205
+ ];
1206
+
1207
+ // If this is a route/controller file with swagger docs, include the whole file
1208
+ if (isRouteFile && swaggerBlocks.length > 0) {
1209
+ hasApiPatterns = true;
1210
+ // Include entire file content (truncated if too long)
1211
+ const maxLines = 300;
1212
+ const truncatedContent = lines.length > maxLines
1213
+ ? lines.slice(0, maxLines).join('\n') + `\n// ... ${lines.length - maxLines} more lines ...`
1214
+ : content;
217
1215
 
218
- // Deprioritize test files
219
- if (aPath.includes('test') || aPath.includes('spec')) return 1;
220
- if (bPath.includes('test') || bPath.includes('spec')) return -1;
1216
+ return {
1217
+ hasApiPatterns: true,
1218
+ content: `// File: ${filePath}\n// Route/Controller file with Swagger docs - full content:\n\n${truncatedContent}`
1219
+ };
1220
+ }
1221
+
1222
+ // If this is a route file without swagger, still include more content
1223
+ if (isRouteFile) {
1224
+ const hasRouteContent = allPatterns.some(p => {
1225
+ p.lastIndex = 0;
1226
+ return p.test(content);
1227
+ });
1228
+
1229
+ if (hasRouteContent) {
1230
+ hasApiPatterns = true;
1231
+ const maxLines = 200;
1232
+ const truncatedContent = lines.length > maxLines
1233
+ ? lines.slice(0, maxLines).join('\n') + `\n// ... ${lines.length - maxLines} more lines ...`
1234
+ : content;
1235
+
1236
+ return {
1237
+ hasApiPatterns: true,
1238
+ content: `// File: ${filePath}\n// Route/Controller file - full content:\n\n${truncatedContent}`
1239
+ };
1240
+ }
1241
+ }
1242
+
1243
+ // If this is a DTO/types file, include full content
1244
+ if (isDtoFile && typeBlocks.length > 0) {
1245
+ hasApiPatterns = true;
1246
+ const maxLines = 150;
1247
+ const truncatedContent = lines.length > maxLines
1248
+ ? lines.slice(0, maxLines).join('\n') + `\n// ... ${lines.length - maxLines} more lines ...`
1249
+ : content;
221
1250
 
222
- // Prioritize src/frontend folders
223
- if (aPath.includes('src/') || aPath.includes('frontend/')) {
224
- if (!bPath.includes('src/') && !bPath.includes('frontend/')) return -1;
1251
+ return {
1252
+ hasApiPatterns: true,
1253
+ content: `// File: ${filePath}\n// DTO/Types file - full content:\n\n${truncatedContent}`
1254
+ };
1255
+ }
1256
+
1257
+ // For non-route files, extract relevant sections
1258
+ // First, extract imports and base URL definitions
1259
+ const baseUrlLines = [];
1260
+
1261
+ for (let i = 0; i < Math.min(lines.length, 50); i++) {
1262
+ const line = lines[i];
1263
+ if (/(?:API_BASE|API_URL|BASE_URL|baseURL|apiUrl|BACKEND)/i.test(line)) {
1264
+ baseUrlLines.push(`${i + 1}: ${line}`);
225
1265
  }
1266
+ }
1267
+
1268
+ if (baseUrlLines.length > 0) {
1269
+ extractedBlocks.push('// Base URL definitions:\n' + baseUrlLines.join('\n'));
1270
+ hasApiPatterns = true;
1271
+ }
1272
+
1273
+ // Line-by-line extraction with context
1274
+ for (let i = 0; i < lines.length; i++) {
1275
+ const line = lines[i];
226
1276
 
227
- const aScore = PRIORITY_KEYWORDS.reduce((score, kw) =>
228
- aPath.includes(kw) ? score + 1 : score, 0);
229
- const bScore = PRIORITY_KEYWORDS.reduce((score, kw) =>
230
- bPath.includes(kw) ? score + 1 : score, 0);
1277
+ // Check all patterns
1278
+ let hasPattern = false;
1279
+ for (const pattern of allPatterns) {
1280
+ pattern.lastIndex = 0;
1281
+ if (pattern.test(line)) {
1282
+ hasPattern = true;
1283
+ break;
1284
+ }
1285
+ }
231
1286
 
232
- return bScore - aScore;
233
- });
1287
+ // Also check for common API keywords (all languages)
1288
+ const hasKeyword = new RegExp([
1289
+ // JavaScript/TypeScript
1290
+ '\\.get\\(', '\\.post\\(', '\\.put\\(', '\\.patch\\(', '\\.delete\\(',
1291
+ 'fetch\\(', 'axios', '\\/api\\/', 'endpoint',
1292
+ '@Get', '@Post', '@Put', '@Delete',
1293
+ // PHP (CodeIgniter, Laravel)
1294
+ '\\$routes\\s*->', 'Route\\s*::', '@Route',
1295
+ '\\$this->input->', '\\$this->request->', // CodeIgniter input
1296
+ '\\$request->', '\\$_POST', '\\$_GET', '\\$_PUT', // PHP request data
1297
+ 'public\\s+function\\s+\\w+', // PHP public methods (potential endpoints)
1298
+ 'extends\\s+\\w*Controller', // PHP controller classes
1299
+ // Python (Flask, FastAPI, Django)
1300
+ '@app\\.route', '@app\\.get', '@app\\.post', '@router\\.',
1301
+ 'path\\s*\\(', 'url\\s*\\(',
1302
+ // Ruby (Rails)
1303
+ 'resources\\s+:', 'get\\s+[\'"]/', 'post\\s+[\'"]/',
1304
+ // Go (Gin, Echo, Chi)
1305
+ '\\.GET\\(', '\\.POST\\(', 'HandleFunc\\(',
1306
+ // Java/Kotlin (Spring)
1307
+ '@GetMapping', '@PostMapping', '@RequestMapping', '@Path'
1308
+ ].join('|'), 'i').test(line);
1309
+
1310
+ if (hasPattern || hasKeyword) {
1311
+ hasApiPatterns = true;
1312
+
1313
+ // Determine context needed
1314
+ const isPostOrPut = /post|put|patch/i.test(line);
1315
+ const contextBefore = isPostOrPut ? 20 : 5; // More context for mutations
1316
+ const contextAfter = isPostOrPut ? 10 : 5;
1317
+
1318
+ const startLine = Math.max(0, i - contextBefore);
1319
+ const endLine = Math.min(lines.length - 1, i + contextAfter);
1320
+
1321
+ const block = lines.slice(startLine, endLine + 1)
1322
+ .map((l, idx) => `${startLine + idx + 1}: ${l}`)
1323
+ .join('\n');
1324
+
1325
+ extractedBlocks.push(block);
1326
+
1327
+ // Skip ahead to avoid duplicates
1328
+ i = endLine;
1329
+ }
1330
+ }
1331
+
1332
+ if (hasApiPatterns && extractedBlocks.length > 0) {
1333
+ // Deduplicate blocks
1334
+ const uniqueBlocks = [...new Set(extractedBlocks)];
1335
+ return {
1336
+ hasApiPatterns: true,
1337
+ content: `// File: ${filePath}\n// API patterns extracted:\n\n${uniqueBlocks.join('\n\n// ---\n\n')}`
1338
+ };
1339
+ }
1340
+
1341
+ return { hasApiPatterns: false, content: '' };
234
1342
  }
235
1343
 
236
1344
  module.exports = { scanCodebase };