contextforge-cli-harshil 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/analyze.js ADDED
@@ -0,0 +1,432 @@
1
+ /**
2
+ * analyze.js
3
+ * Analyzes scanned files to detect framework, architecture, and important modules.
4
+ * Pure heuristic scoring — no embeddings, no external services.
5
+ */
6
+
7
+ import { readFileSafe } from './scan.js';
8
+ import { logger } from './utils.js';
9
+
10
+ // ── Framework / Stack detection signals ──────────────────────────────────────
11
+
12
+ const FRAMEWORK_SIGNALS = [
13
+ // Backend
14
+ { pattern: /["']express["']/i, label: 'Express.js', category: 'backend' },
15
+ { pattern: /["']fastify["']/i, label: 'Fastify', category: 'backend' },
16
+ { pattern: /["']hono["']/i, label: 'Hono', category: 'backend' },
17
+ { pattern: /["']koa["']/i, label: 'Koa.js', category: 'backend' },
18
+ { pattern: /["']nestjs\/core["']/i, label: 'NestJS', category: 'backend' },
19
+ { pattern: /["']@hapi\/hapi["']/i, label: 'Hapi.js', category: 'backend' },
20
+
21
+ // Frontend
22
+ { pattern: /["']next["']/i, label: 'Next.js', category: 'frontend' },
23
+ { pattern: /["']nuxt["']/i, label: 'Nuxt.js', category: 'frontend' },
24
+ { pattern: /["']react["']/i, label: 'React', category: 'frontend' },
25
+ { pattern: /["']vue["']/i, label: 'Vue.js', category: 'frontend' },
26
+ { pattern: /["']svelte["']/i, label: 'Svelte', category: 'frontend' },
27
+ { pattern: /["']solid-js["']/i, label: 'SolidJS', category: 'frontend' },
28
+ { pattern: /["']astro["']/i, label: 'Astro', category: 'frontend' },
29
+ { pattern: /["']remix["']/i, label: 'Remix', category: 'frontend' },
30
+ { pattern: /["']@angular\/core["']/i, label: 'Angular', category: 'frontend' },
31
+
32
+ // Build tools
33
+ { pattern: /["']vite["']/i, label: 'Vite', category: 'build' },
34
+ { pattern: /["']webpack["']/i, label: 'Webpack', category: 'build' },
35
+ { pattern: /["']esbuild["']/i, label: 'esbuild', category: 'build' },
36
+ { pattern: /["']turbopack["']/i, label: 'Turbopack', category: 'build' },
37
+
38
+ // ORMs / Databases
39
+ { pattern: /["']@prisma\/client["']/i, label: 'Prisma ORM', category: 'database' },
40
+ { pattern: /["']mongoose["']/i, label: 'Mongoose (MongoDB)', category: 'database' },
41
+ { pattern: /["']typeorm["']/i, label: 'TypeORM', category: 'database' },
42
+ { pattern: /["']drizzle-orm["']/i, label: 'Drizzle ORM', category: 'database' },
43
+ { pattern: /["']sequelize["']/i, label: 'Sequelize', category: 'database' },
44
+ { pattern: /["']knex["']/i, label: 'Knex.js', category: 'database' },
45
+ { pattern: /["']pg["']/i, label: 'PostgreSQL (pg)', category: 'database' },
46
+ { pattern: /["']mysql2["']/i, label: 'MySQL (mysql2)', category: 'database' },
47
+ { pattern: /["']better-sqlite3["']/i, label: 'SQLite (better-sqlite3)', category: 'database' },
48
+ { pattern: /["']redis["']/i, label: 'Redis', category: 'database' },
49
+ { pattern: /["']ioredis["']/i, label: 'Redis (ioredis)', category: 'database' },
50
+ { pattern: /["']mongodb["']/i, label: 'MongoDB Driver', category: 'database' },
51
+
52
+ // Auth
53
+ { pattern: /["']jsonwebtoken["']/i, label: 'JWT (jsonwebtoken)', category: 'auth' },
54
+ { pattern: /["']passport["']/i, label: 'Passport.js', category: 'auth' },
55
+ { pattern: /["']next-auth["']/i, label: 'NextAuth.js', category: 'auth' },
56
+ { pattern: /["']@auth\/core["']/i, label: 'Auth.js', category: 'auth' },
57
+ { pattern: /["']bcrypt["']/i, label: 'bcrypt', category: 'auth' },
58
+ { pattern: /["']clerk["']/i, label: 'Clerk', category: 'auth' },
59
+ { pattern: /["']@supabase\/supabase-js["']/i, label: 'Supabase', category: 'auth' },
60
+ { pattern: /["']firebase["']/i, label: 'Firebase', category: 'auth' },
61
+
62
+ // State / Query
63
+ { pattern: /["']zustand["']/i, label: 'Zustand', category: 'state' },
64
+ { pattern: /["']jotai["']/i, label: 'Jotai', category: 'state' },
65
+ { pattern: /["']@reduxjs\/toolkit["']/i, label: 'Redux Toolkit', category: 'state' },
66
+ { pattern: /["']@tanstack\/react-query["']/i, label: 'TanStack Query', category: 'state' },
67
+ { pattern: /["']swr["']/i, label: 'SWR', category: 'state' },
68
+
69
+ // Validation
70
+ { pattern: /["']zod["']/i, label: 'Zod', category: 'validation' },
71
+ { pattern: /["']joi["']/i, label: 'Joi', category: 'validation' },
72
+ { pattern: /["']yup["']/i, label: 'Yup', category: 'validation' },
73
+ { pattern: /["']class-validator["']/i, label: 'class-validator', category: 'validation' },
74
+
75
+ // Testing
76
+ { pattern: /["']jest["']/i, label: 'Jest', category: 'testing' },
77
+ { pattern: /["']vitest["']/i, label: 'Vitest', category: 'testing' },
78
+ { pattern: /["']mocha["']/i, label: 'Mocha', category: 'testing' },
79
+ { pattern: /["']@playwright\/test["']/i, label: 'Playwright', category: 'testing' },
80
+ { pattern: /["']cypress["']/i, label: 'Cypress', category: 'testing' },
81
+
82
+ // CSS
83
+ { pattern: /["']tailwindcss["']/i, label: 'Tailwind CSS', category: 'styling' },
84
+ { pattern: /["']styled-components["']/i, label: 'styled-components', category: 'styling' },
85
+ { pattern: /["']@emotion\/react["']/i, label: 'Emotion CSS', category: 'styling' },
86
+
87
+ // Misc
88
+ { pattern: /["']graphql["']/i, label: 'GraphQL', category: 'api' },
89
+ { pattern: /["']@trpc\/server["']/i, label: 'tRPC', category: 'api' },
90
+ { pattern: /["']axios["']/i, label: 'Axios', category: 'http' },
91
+ { pattern: /["']socket\.io["']/i, label: 'Socket.IO', category: 'realtime' },
92
+ ];
93
+
94
+ // ── File importance scoring ──────────────────────────────────────────────────
95
+
96
+ const HIGH_IMPORTANCE_FILENAMES = [
97
+ 'app.js', 'app.ts', 'server.js', 'server.ts', 'main.js', 'main.ts',
98
+ 'index.js', 'index.ts', 'package.json', 'README.md', 'readme.md',
99
+ 'schema.prisma', 'docker-compose.yml', 'docker-compose.yaml',
100
+ 'Dockerfile', '.env.example', '.env.sample',
101
+ 'tsconfig.json', 'jsconfig.json',
102
+ ];
103
+
104
+ const HIGH_IMPORTANCE_DIRS = [
105
+ 'auth', 'middleware', 'middlewares', 'routes', 'prisma',
106
+ 'models', 'schemas', 'services', 'controllers', 'config',
107
+ 'api', 'hooks', 'store', 'context',
108
+ ];
109
+
110
+ const HIGH_IMPORTANCE_CONTENT_PATTERNS = [
111
+ /router\.(get|post|put|patch|delete|use)\s*\(/i,
112
+ /app\.(get|post|put|patch|delete|use)\s*\(/i,
113
+ /module\.exports\s*=/,
114
+ /export\s+default/,
115
+ /export\s+async\s+function/,
116
+ /prisma\./i,
117
+ /mongoose\.model/i,
118
+ /sequelize\.define/i,
119
+ /jwt\.(sign|verify)/i,
120
+ /passport\.use/i,
121
+ /middleware/i,
122
+ /controller/i,
123
+ /service/i,
124
+ /repository/i,
125
+ /schema/i,
126
+ ];
127
+
128
+ /**
129
+ * Score a file for importance (higher = more important).
130
+ * @param {{ path: string, absolutePath: string }} file
131
+ * @returns {number}
132
+ */
133
+ function scoreFile(file) {
134
+ let score = 0;
135
+ const filename = file.path.split('/').pop().toLowerCase();
136
+ const pathParts = file.path.toLowerCase().split('/');
137
+
138
+ // High-importance filename match
139
+ if (HIGH_IMPORTANCE_FILENAMES.some((n) => n.toLowerCase() === filename)) {
140
+ score += 10;
141
+ }
142
+
143
+ // High-importance directory match
144
+ if (
145
+ HIGH_IMPORTANCE_DIRS.some((dir) => pathParts.some((p) => p === dir))
146
+ ) {
147
+ score += 7;
148
+ }
149
+
150
+ // File extension bonus
151
+ if (/\.(js|ts|jsx|tsx|mjs|cjs)$/.test(filename)) score += 3;
152
+ if (/\.(json|yaml|yml|prisma|sql|graphql|gql)$/.test(filename)) score += 2;
153
+ if (/\.(md|mdx|txt)$/.test(filename)) score += 1;
154
+
155
+ // Filename keyword bonus
156
+ const keywords = [
157
+ 'auth', 'route', 'router', 'controller', 'service', 'middleware',
158
+ 'model', 'schema', 'config', 'database', 'db', 'store', 'hook',
159
+ 'api', 'handler', 'guard', 'interceptor', 'repository', 'prisma',
160
+ ];
161
+ if (keywords.some((kw) => filename.includes(kw))) score += 5;
162
+
163
+ // Content-based scoring — only incur the disk read for files
164
+ // that already look interesting from their name/path (score >= 5).
165
+ // This avoids reading every file in the repo on large codebases.
166
+ if (score >= 5) {
167
+ const content = readFileSafe(file.absolutePath, 10_000);
168
+ if (content) {
169
+ HIGH_IMPORTANCE_CONTENT_PATTERNS.forEach((pattern) => {
170
+ if (pattern.test(content)) score += 2;
171
+ });
172
+ }
173
+ }
174
+
175
+ return score;
176
+ }
177
+
178
+ /**
179
+ * Detect the tech stack from package.json content.
180
+ * @param {string|null} pkgContent
181
+ * @returns {{ stack: string[], byCategory: Object }}
182
+ */
183
+ function detectStack(pkgContent) {
184
+ if (!pkgContent) return { stack: [], byCategory: {} };
185
+
186
+ const byCategory = {};
187
+ const stack = [];
188
+
189
+ FRAMEWORK_SIGNALS.forEach(({ pattern, label, category }) => {
190
+ if (pattern.test(pkgContent)) {
191
+ stack.push(label);
192
+ if (!byCategory[category]) byCategory[category] = [];
193
+ byCategory[category].push(label);
194
+ }
195
+ });
196
+
197
+ return { stack, byCategory };
198
+ }
199
+
200
+ /**
201
+ * Detect architecture patterns from file paths and content snippets.
202
+ * @param {Array<{path:string, absolutePath:string}>} files
203
+ * @returns {string[]}
204
+ */
205
+ function detectArchitecturePatterns(files) {
206
+ const patterns = new Set();
207
+ const paths = files.map((f) => f.path.toLowerCase());
208
+
209
+ // Folder-based patterns
210
+ if (paths.some((p) => p.includes('/routes/') || p.includes('/route.'))) {
211
+ patterns.add('Route-based API structure');
212
+ }
213
+ if (paths.some((p) => p.includes('/controllers/') || p.includes('/controller.'))) {
214
+ patterns.add('MVC / Controller pattern');
215
+ }
216
+ if (paths.some((p) => p.includes('/services/') || p.includes('/service.'))) {
217
+ patterns.add('Service layer pattern');
218
+ }
219
+ if (paths.some((p) => p.includes('/repositories/') || p.includes('/repository.'))) {
220
+ patterns.add('Repository pattern');
221
+ }
222
+ if (paths.some((p) => p.includes('/middleware/') || p.includes('/middlewares/'))) {
223
+ patterns.add('Middleware-based request handling');
224
+ }
225
+ if (paths.some((p) => p.includes('/models/') || p.includes('/model.'))) {
226
+ patterns.add('Data model abstraction');
227
+ }
228
+ if (paths.some((p) => p.includes('/hooks/'))) {
229
+ patterns.add('Custom React hooks');
230
+ }
231
+ if (paths.some((p) => p.includes('/store/'))) {
232
+ patterns.add('Centralized state management');
233
+ }
234
+ if (paths.some((p) => p.includes('/components/'))) {
235
+ patterns.add('Component-based UI architecture');
236
+ }
237
+ if (paths.some((p) => p.includes('/pages/') || p.includes('/app/'))) {
238
+ patterns.add('File-system based routing');
239
+ }
240
+ if (paths.some((p) => p.includes('prisma/schema'))) {
241
+ patterns.add('Prisma schema-driven database');
242
+ }
243
+ if (paths.some((p) => p.includes('/graphql/') || p.includes('.graphql'))) {
244
+ patterns.add('GraphQL API layer');
245
+ }
246
+ if (paths.some((p) => p.includes('/api/'))) {
247
+ patterns.add('Dedicated API directory');
248
+ }
249
+ if (paths.some((p) => p.includes('docker-compose') || p.includes('dockerfile'))) {
250
+ patterns.add('Containerized deployment (Docker)');
251
+ }
252
+
253
+ return Array.from(patterns);
254
+ }
255
+
256
+ /**
257
+ * Detect authentication approach from file content.
258
+ * @param {Array<{path:string, absolutePath:string}>} files
259
+ * @returns {string[]}
260
+ */
261
+ function detectAuthApproach(files) {
262
+ const signals = new Set();
263
+
264
+ const authFiles = files.filter((f) =>
265
+ f.path.toLowerCase().includes('auth') ||
266
+ f.path.toLowerCase().includes('middleware') ||
267
+ f.path.toLowerCase().includes('guard')
268
+ );
269
+
270
+ authFiles.forEach((file) => {
271
+ const content = readFileSafe(file.absolutePath, 20_000);
272
+ if (!content) return;
273
+
274
+ if (/jwt\.(sign|verify)/i.test(content)) signals.add('JWT token authentication');
275
+ if (/passport\./i.test(content)) signals.add('Passport.js authentication');
276
+ if (/bcrypt/i.test(content)) signals.add('Password hashing (bcrypt)');
277
+ if (/refresh.?token/i.test(content)) signals.add('Refresh token rotation');
278
+ if (/session/i.test(content)) signals.add('Session-based authentication');
279
+ if (/oauth/i.test(content)) signals.add('OAuth integration');
280
+ if (/clerk/i.test(content)) signals.add('Clerk authentication');
281
+ if (/supabase/i.test(content)) signals.add('Supabase Auth');
282
+ if (/next-auth|nextauth/i.test(content)) signals.add('NextAuth.js');
283
+ if (/role|permission|rbac/i.test(content)) signals.add('Role-based access control (RBAC)');
284
+ if (/api.?key/i.test(content)) signals.add('API key authentication');
285
+ if (/bearer/i.test(content)) signals.add('Bearer token scheme');
286
+ });
287
+
288
+ return Array.from(signals);
289
+ }
290
+
291
+ /**
292
+ * Detect coding conventions from file contents.
293
+ * @param {Array<{path:string, absolutePath:string}>} files
294
+ * @returns {string[]}
295
+ */
296
+ function detectCodingConventions(files) {
297
+ const conventions = new Set();
298
+
299
+ // Sample a few important JS/TS files
300
+ const sampleFiles = files
301
+ .filter((f) => /\.(js|ts|jsx|tsx|mjs)$/.test(f.path))
302
+ .slice(0, 15);
303
+
304
+ let asyncCount = 0;
305
+ let callbackCount = 0;
306
+ let classCount = 0;
307
+ let functionalCount = 0;
308
+ let esModuleCount = 0;
309
+ let commonJsCount = 0;
310
+ let typeScriptCount = 0;
311
+
312
+ // Fix #5: read each file once and cache — reused below for allContent
313
+ const contentCache = [];
314
+
315
+ sampleFiles.forEach((file) => {
316
+ const content = readFileSafe(file.absolutePath, 15_000);
317
+ if (!content) return;
318
+
319
+ contentCache.push(content); // store for reuse
320
+
321
+ if (/async\s+function|await\s+/i.test(content)) asyncCount++;
322
+ if (/\.then\(|\.catch\(|callback/i.test(content)) callbackCount++;
323
+ if (/^class\s+/m.test(content)) classCount++;
324
+ if (/^(export\s+)?(const|function)\s+/m.test(content)) functionalCount++;
325
+ if (/^import\s+/m.test(content)) esModuleCount++;
326
+ if (/require\s*\(/m.test(content)) commonJsCount++;
327
+ if (/:\s*(string|number|boolean|void|Promise<)/i.test(content)) typeScriptCount++;
328
+ if (/interface\s+\w+|type\s+\w+\s*=/i.test(content)) typeScriptCount++;
329
+ });
330
+
331
+ if (asyncCount > callbackCount) conventions.add('async/await pattern');
332
+ if (callbackCount > asyncCount) conventions.add('Callback-based async');
333
+ if (esModuleCount > commonJsCount) conventions.add('ES Modules (import/export)');
334
+ if (commonJsCount > esModuleCount) conventions.add('CommonJS (require/exports)');
335
+ if (classCount > functionalCount) conventions.add('Class-based architecture');
336
+ if (functionalCount > classCount) conventions.add('Functional programming style');
337
+ if (typeScriptCount > 3) conventions.add('TypeScript with strict typing');
338
+
339
+ // Fix #5: reuse cached content — no second disk read
340
+ const allContent = contentCache.join('\n');
341
+
342
+ // Fix #7: ReDoS-safe try/catch detection — two simple non-backtracking patterns
343
+ // instead of the vulnerable /try\s*\{[\s\S]*?\}\s*catch/i
344
+ if (/\btry\s*\{/.test(allContent) && /\}\s*catch\s*[({]/.test(allContent)) {
345
+ conventions.add('try/catch error handling');
346
+ }
347
+ if (/\.env|process\.env/i.test(allContent)) conventions.add('Environment variable configuration');
348
+ if (/console\.(log|error|warn)/i.test(allContent)) conventions.add('Console-based logging');
349
+ if (/zod|joi|yup/i.test(allContent)) conventions.add('Schema validation');
350
+ if (/\.test\.|\.spec\./i.test(allContent)) conventions.add('Unit test coverage');
351
+
352
+ return Array.from(conventions);
353
+ }
354
+
355
+ /**
356
+ * Main analysis function.
357
+ * @param {Array<{path:string, absolutePath:string}>} files
358
+ * @param {string} cwd
359
+ * @returns {Object} Analysis result
360
+ */
361
+ export function analyzeRepository(files, cwd) {
362
+ // Read package.json
363
+ const pkgFile = files.find((f) => f.path === 'package.json');
364
+ const pkgContent = pkgFile ? readFileSafe(pkgFile.absolutePath) : null;
365
+ // Fix #9: log a warning if package.json is malformed instead of silently
366
+ // returning null and running with missing stack/metadata/scripts.
367
+ let pkgJson = null;
368
+ try {
369
+ pkgJson = pkgContent ? JSON.parse(pkgContent) : null;
370
+ } catch (err) {
371
+ logger.warn(`Could not parse package.json: ${err.message}`);
372
+ logger.dim('Stack detection and project metadata will be unavailable.');
373
+ }
374
+ // Detect stack
375
+ const { stack, byCategory } = detectStack(pkgContent);
376
+
377
+ // Score and rank files by importance
378
+ const scoredFiles = files
379
+ .map((f) => ({ ...f, score: scoreFile(f) }))
380
+ .sort((a, b) => b.score - a.score);
381
+
382
+ // Top important files
383
+ const importantFiles = scoredFiles.filter((f) => f.score >= 5).slice(0, 30);
384
+
385
+ // Architecture patterns
386
+ const architecturePatterns = detectArchitecturePatterns(files);
387
+
388
+ // Auth approach
389
+ const authApproach = detectAuthApproach(files);
390
+
391
+ // Coding conventions
392
+ const codingConventions = detectCodingConventions(files);
393
+
394
+ // Project name
395
+ const projectName = pkgJson?.name || cwd.split(/[\\/]/).pop() || 'Unknown Project';
396
+ const projectVersion = pkgJson?.version || null;
397
+ const projectDescription = pkgJson?.description || null;
398
+
399
+ // Scripts
400
+ const scripts = pkgJson?.scripts ? Object.keys(pkgJson.scripts) : [];
401
+
402
+ // Language detection
403
+ const hasTS = files.some((f) => /\.tsx?$/.test(f.path));
404
+ const hasJS = files.some((f) => /\.(js|mjs|cjs)$/.test(f.path));
405
+ const language = hasTS ? 'TypeScript' : hasJS ? 'JavaScript' : 'Unknown';
406
+
407
+ // Module system
408
+ const moduleSystem =
409
+ pkgJson?.type === 'module'
410
+ ? 'ES Modules'
411
+ : pkgJson?.type === 'commonjs'
412
+ ? 'CommonJS'
413
+ : 'CommonJS (default)';
414
+
415
+ return {
416
+ projectName,
417
+ projectVersion,
418
+ projectDescription,
419
+ language,
420
+ moduleSystem,
421
+ stack,
422
+ byCategory,
423
+ architecturePatterns,
424
+ authApproach,
425
+ codingConventions,
426
+ scripts,
427
+ importantFiles,
428
+ allFiles: files,
429
+ totalFiles: files.length,
430
+ pkgJson,
431
+ };
432
+ }