codecritique 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +1145 -0
  3. package/package.json +98 -0
  4. package/src/content-retrieval.js +747 -0
  5. package/src/custom-documents.js +597 -0
  6. package/src/embeddings/cache-manager.js +364 -0
  7. package/src/embeddings/constants.js +40 -0
  8. package/src/embeddings/database.js +921 -0
  9. package/src/embeddings/errors.js +208 -0
  10. package/src/embeddings/factory.js +447 -0
  11. package/src/embeddings/file-processor.js +851 -0
  12. package/src/embeddings/model-manager.js +337 -0
  13. package/src/embeddings/similarity-calculator.js +97 -0
  14. package/src/embeddings/types.js +113 -0
  15. package/src/feedback-loader.js +384 -0
  16. package/src/index.js +1418 -0
  17. package/src/llm.js +123 -0
  18. package/src/pr-history/analyzer.js +579 -0
  19. package/src/pr-history/bot-detector.js +123 -0
  20. package/src/pr-history/cli-utils.js +204 -0
  21. package/src/pr-history/comment-processor.js +549 -0
  22. package/src/pr-history/database.js +819 -0
  23. package/src/pr-history/github-client.js +629 -0
  24. package/src/project-analyzer.js +955 -0
  25. package/src/rag-analyzer.js +2764 -0
  26. package/src/rag-review.js +566 -0
  27. package/src/technology-keywords.json +753 -0
  28. package/src/utils/command.js +48 -0
  29. package/src/utils/constants.js +263 -0
  30. package/src/utils/context-inference.js +364 -0
  31. package/src/utils/document-detection.js +105 -0
  32. package/src/utils/file-validation.js +271 -0
  33. package/src/utils/git.js +232 -0
  34. package/src/utils/language-detection.js +170 -0
  35. package/src/utils/logging.js +24 -0
  36. package/src/utils/markdown.js +132 -0
  37. package/src/utils/mobilebert-tokenizer.js +141 -0
  38. package/src/utils/pr-chunking.js +276 -0
  39. package/src/utils/string-utils.js +28 -0
  40. package/src/zero-shot-classifier-open.js +392 -0
@@ -0,0 +1,955 @@
1
+ /**
2
+ * Project Architecture Analyzer
3
+ *
4
+ * Analyzes project structure during embedding generation to create comprehensive
5
+ * project summaries that can be used as context during code reviews.
6
+ */
7
+
8
+ import crypto from 'crypto';
9
+ import fs from 'fs';
10
+ import path from 'path';
11
+ import chalk from 'chalk';
12
+ import { getDefaultEmbeddingsSystem } from './embeddings/factory.js';
13
+ import * as llm from './llm.js';
14
+ import { isDocumentationFile, isTestFile } from './utils/file-validation.js';
15
+
16
+ // Consolidated file classification configuration
17
+ const FILE_PATTERNS = {
18
+ config: {
19
+ regexes: [
20
+ /\.config\.(js|ts|json|yaml|yml|toml|ini|conf)$/,
21
+ /^dockerfile$/i,
22
+ /^docker-compose\.(yml|yaml)$/,
23
+ /^makefile$/i,
24
+ /^cmake.*\.txt$/i,
25
+ /^(webpack|vite|babel|rollup|prettier|eslint)\.config/,
26
+ /^(tsconfig|jsconfig)\.json$/,
27
+ /\.(eslintrc|prettierrc|babelrc)/,
28
+ /^(jest|vitest|playwright)\.config/,
29
+ /^(setup|pyproject|tox|pytest)\.((py|toml|ini|cfg))$/,
30
+ /^\.pylintrc$/,
31
+ /^requirements.*\.txt$/,
32
+ /^pipfile(\.lock)?$/i,
33
+ /^pom\.xml$/,
34
+ /^build\.gradle(\.kts)?$/,
35
+ /^gradle\.properties$/,
36
+ /^go\.(mod|sum)$/,
37
+ /^cargo\.(toml|lock)$/i,
38
+ /^gemfile(\.lock)?$/i,
39
+ /^composer\.(json|lock)$/,
40
+ /^cmakelists\.txt$/i,
41
+ /^conanfile\.(txt|py)$/,
42
+ /^vcpkg\.json$/,
43
+ ],
44
+ pathChecks: ['.github/workflows/', '.vscode/', '.devcontainer/'],
45
+ keywords: ['config'],
46
+ },
47
+
48
+ entry: {
49
+ regexes: [
50
+ /^(index|main|app|server)\.(js|ts|jsx|tsx|mjs|cjs)$/,
51
+ /^_app\.(js|ts|jsx|tsx)$/,
52
+ /(router|routes|routing)\.(js|ts)$/,
53
+ /^(__main__|main|app|run|manage)\.py$/,
54
+ /^(main|application|app)\.java$/i,
55
+ /^main\.go$/,
56
+ /^(main|lib)\.rs$/,
57
+ /^(main|app)\.rb$/,
58
+ /^(index|app|main)\.php$/,
59
+ /^main\.(c|cpp|cc|cxx)$/,
60
+ /^(run|start|bootstrap)\.(sh|bash|zsh)$/,
61
+ ],
62
+ pathChecks: ['/bin/', '/scripts/'],
63
+ keywords: ['index', 'main'],
64
+ },
65
+
66
+ dependency: {
67
+ regexes: [
68
+ /^package(-lock)?\.json$/,
69
+ /^yarn\.lock$/,
70
+ /^pnpm-lock\.yaml$/,
71
+ /^requirements.*\.txt$/,
72
+ /^pipfile(\.lock)?$/i,
73
+ /^pyproject\.toml$/,
74
+ /^poetry\.lock$/,
75
+ /^pom\.xml$/,
76
+ /^build\.gradle(\.kts)?$/,
77
+ /^gradle\.lockfile$/,
78
+ /^go\.(mod|sum)$/,
79
+ /^cargo\.(toml|lock)$/i,
80
+ /^gemfile(\.lock)?$/i,
81
+ /^composer\.(json|lock)$/,
82
+ /^conanfile\.(txt|py)$/,
83
+ /^vcpkg\.json$/,
84
+ /-lock\.(json|yaml|yml|toml)$/,
85
+ /\.lock$/,
86
+ ],
87
+ },
88
+
89
+ utility: {
90
+ regexes: [
91
+ /(util|utility|helper|service|api|hook|wrapper|component|store|state|common|shared|lib)/i,
92
+ /(core|base|foundation|framework)/i,
93
+ /(middleware|plugin|extension|adapter)/i,
94
+ /(lazy|async|await|promise|retry|preload|loader|chunk|suspend)/i,
95
+ /(context|provider|factory|builder|creator|generator|maker)/i,
96
+ /(error|boundary|fallback|recovery)/i,
97
+ ],
98
+ pathChecks: ['/src/', '/lib/', '/utils/', '/helpers/', '/services/', '/common/', '/shared/', '/core/', '/pkg/', '/internal/'],
99
+ excludePatterns: [isTestFile, isDocumentationFile],
100
+ },
101
+
102
+ types: {
103
+ regexes: [
104
+ /(types?|interface|model|schema|definition|contract)/i,
105
+ /\.(d\.ts|types\.ts|interfaces\.ts|models\.ts)$/,
106
+ /(graphql|gql|schema)/i,
107
+ ],
108
+ pathChecks: ['/src/', '/types/', '/models/', '/schemas/', '/lib/'],
109
+ excludePatterns: [isTestFile, isDocumentationFile],
110
+ },
111
+ };
112
+
113
+ // Database query configurations
114
+ const DB_SEARCH_CONFIGS = [
115
+ {
116
+ category: 'package',
117
+ terms: [
118
+ 'package.json',
119
+ 'package-lock.json',
120
+ 'yarn.lock',
121
+ 'pnpm-lock.yaml',
122
+ 'requirements.txt',
123
+ 'pipfile',
124
+ 'pyproject.toml',
125
+ 'gemfile',
126
+ 'cargo.toml',
127
+ 'pom.xml',
128
+ 'build.gradle',
129
+ 'composer.json',
130
+ ],
131
+ limit: 30,
132
+ matcher: 'dependency',
133
+ },
134
+ { category: 'config', terms: ['config', 'dockerfile', 'makefile', 'eslint', 'prettier', 'jest'], limit: 30, matcher: 'config' },
135
+ {
136
+ category: 'setup',
137
+ whereClause: "name LIKE '%index%' OR name LIKE '%main%' OR name LIKE '%app%' OR name LIKE '%server%'",
138
+ limit: 20,
139
+ matcher: 'entry',
140
+ },
141
+ {
142
+ category: 'utility',
143
+ terms: ['utils', 'helpers', 'common', 'lib', 'hooks', 'wrapper', 'lazy', 'async', 'context', 'provider'],
144
+ limit: 30,
145
+ matcher: 'utility',
146
+ },
147
+ {
148
+ category: 'frontend',
149
+ terms: [
150
+ 'components',
151
+ 'lazy',
152
+ 'preload',
153
+ 'chunk',
154
+ 'route',
155
+ 'app',
156
+ 'wrapper',
157
+ 'async',
158
+ 'suspense',
159
+ 'react',
160
+ 'retry',
161
+ 'error',
162
+ 'boundary',
163
+ 'fallback',
164
+ 'maker',
165
+ ],
166
+ limit: 30,
167
+ matcher: 'utility',
168
+ },
169
+ {
170
+ category: 'backend',
171
+ terms: ['services', 'api', 'graphql', 'resolver', 'schema', 'server', 'database', 'context', 'auth', 'middleware', 'validation'],
172
+ limit: 30,
173
+ matcher: 'utility',
174
+ },
175
+ { category: 'types', terms: ['types', 'interfaces', 'models', 'schema', 'definitions'], limit: 15, matcher: 'types' },
176
+ { category: 'docs', whereClause: "name LIKE '%README%' OR name LIKE '%CHANGELOG%' OR name LIKE '%.md'", limit: 10, matcher: 'docs' },
177
+ {
178
+ category: 'tests',
179
+ whereClause: "name LIKE '%test%' OR name LIKE '%spec%' OR path LIKE '%test%' OR path LIKE '%spec%'",
180
+ limit: 15,
181
+ matcher: 'tests',
182
+ },
183
+ ];
184
+
185
+ export class ProjectAnalyzer {
186
+ constructor() {
187
+ this.llm = null;
188
+ this.projectSummary = null;
189
+ this.keyFiles = [];
190
+ this.lastAnalysisHash = null;
191
+ }
192
+
193
+ /**
194
+ * Analyze project structure and generate comprehensive summary
195
+ */
196
+ async analyzeProject(projectPath, options = {}) {
197
+ const { verbose = false, forceAnalysis = false } = options;
198
+
199
+ try {
200
+ if (verbose) {
201
+ console.log(chalk.cyan('🔍 Starting project architecture analysis...'));
202
+ }
203
+
204
+ // Initialize LLM client
205
+ if (!this.llm) {
206
+ this.llm = llm;
207
+ }
208
+
209
+ // Check for existing analysis
210
+ const existingSummary = forceAnalysis ? null : await this.loadExistingAnalysis(projectPath);
211
+ if (existingSummary && !forceAnalysis) {
212
+ const currentHash = await this.calculateKeyFilesHash(existingSummary.keyFiles);
213
+ if (existingSummary.keyFilesHash === currentHash) {
214
+ if (verbose) {
215
+ console.log(chalk.green('✅ Project analysis up-to-date (no key file changes detected)'));
216
+ }
217
+ return existingSummary;
218
+ }
219
+ if (verbose) {
220
+ console.log(chalk.yellow('🔄 Key files changed, regenerating analysis...'));
221
+ }
222
+ } else if (verbose) {
223
+ console.log(
224
+ chalk.cyan(
225
+ forceAnalysis
226
+ ? '🔄 Force analysis requested - regenerating from scratch...'
227
+ : '🆕 First-time analysis - discovering key files...'
228
+ )
229
+ );
230
+ }
231
+
232
+ // Discover or validate key files
233
+ const keyFiles = existingSummary
234
+ ? await this.validateAndUpdateKeyFiles(existingSummary.keyFiles, projectPath)
235
+ : await this.discoverKeyFilesWithLLM(projectPath);
236
+
237
+ if (verbose) {
238
+ console.log(chalk.gray(` Found ${keyFiles.length} key architectural files`));
239
+ console.log(chalk.cyan('🧠 Generating LLM-based project analysis...'));
240
+ }
241
+
242
+ // Generate summary
243
+ const projectSummary = await this.generateProjectSummary(keyFiles, projectPath);
244
+
245
+ // Store results
246
+ const currentHash = await this.calculateKeyFilesHash(keyFiles);
247
+ projectSummary.keyFiles = keyFiles;
248
+ projectSummary.keyFilesHash = currentHash;
249
+
250
+ await this.storeAnalysis(projectPath, projectSummary);
251
+
252
+ this.projectSummary = projectSummary;
253
+ this.keyFiles = keyFiles;
254
+ this.lastAnalysisHash = currentHash;
255
+
256
+ if (verbose) {
257
+ console.log(chalk.green('✅ Project analysis complete'));
258
+ console.log(chalk.gray(` Technologies: ${(projectSummary.technologies || []).join(', ')}`));
259
+ console.log(chalk.gray(` Key patterns: ${(projectSummary.keyPatterns || []).length} identified`));
260
+ console.log(chalk.gray(` Key files tracked: ${keyFiles.length}`));
261
+ }
262
+
263
+ return projectSummary;
264
+ } catch (error) {
265
+ console.error(chalk.red('Error analyzing project:'), error.message);
266
+ return this.createFallbackSummary(projectPath);
267
+ }
268
+ }
269
+
270
+ /**
271
+ * Load existing project analysis from database
272
+ */
273
+ async loadExistingAnalysis(projectPath) {
274
+ try {
275
+ const embeddingsSystem = getDefaultEmbeddingsSystem();
276
+ const summary = await embeddingsSystem.getProjectSummary(projectPath);
277
+
278
+ if (summary && summary.keyFiles) {
279
+ const keyFiles = summary.keyFiles.map((kf) => ({
280
+ relativePath: kf.path,
281
+ fullPath: path.join(projectPath, kf.path),
282
+ category: kf.category,
283
+ size: 0,
284
+ lastModified: new Date(kf.lastModified),
285
+ }));
286
+ return { ...summary, keyFiles };
287
+ }
288
+ return null;
289
+ } catch (error) {
290
+ console.error(chalk.yellow('Warning: Could not load existing analysis:'), error.message);
291
+ return null;
292
+ }
293
+ }
294
+
295
+ /**
296
+ * Store analysis results in database
297
+ */
298
+ async storeAnalysis(projectPath, projectSummary) {
299
+ try {
300
+ const embeddingsSystem = getDefaultEmbeddingsSystem();
301
+ await embeddingsSystem.storeProjectSummary(projectPath, projectSummary);
302
+ console.log(chalk.green('✅ Project analysis stored in database'));
303
+ } catch (error) {
304
+ console.error(chalk.yellow('Warning: Could not store analysis:'), error.message);
305
+ }
306
+ }
307
+
308
+ /**
309
+ * Validate and update existing key files list
310
+ */
311
+ async validateAndUpdateKeyFiles(existingKeyFiles, projectPath) {
312
+ const validatedFiles = [];
313
+
314
+ for (const keyFile of existingKeyFiles) {
315
+ const fullPath = path.join(projectPath, keyFile.relativePath || keyFile.path);
316
+ if (fs.existsSync(fullPath)) {
317
+ const stats = fs.statSync(fullPath);
318
+ validatedFiles.push({
319
+ relativePath: keyFile.relativePath || keyFile.path,
320
+ fullPath,
321
+ category: keyFile.category || 'unknown',
322
+ size: stats.size,
323
+ lastModified: stats.mtime,
324
+ });
325
+ }
326
+ }
327
+
328
+ // If we lost more than 30% of key files, trigger fresh discovery
329
+ if (validatedFiles.length < existingKeyFiles.length * 0.7) {
330
+ console.log(chalk.yellow('⚠️ Many key files missing, performing fresh discovery...'));
331
+ return await this.discoverKeyFilesWithLLM(projectPath);
332
+ }
333
+
334
+ return validatedFiles;
335
+ }
336
+
337
+ /**
338
+ * Discover key architectural files using LanceDB hybrid search
339
+ */
340
+ async discoverKeyFilesWithLLM(projectPath) {
341
+ console.log(chalk.cyan('🔍 Mining codebase embeddings with LanceDB hybrid search...'));
342
+
343
+ const keyFilesByCategory = await this.mineKeyFilesFromEmbeddings(projectPath);
344
+ console.log(chalk.cyan(`🧠 LLM analyzing ${keyFilesByCategory.length} candidates from embedding search...`));
345
+
346
+ const keyFiles = await this.selectFinalKeyFiles(keyFilesByCategory, projectPath);
347
+ return keyFiles;
348
+ }
349
+
350
+ /**
351
+ * Mine key files from embeddings database using unified search approach
352
+ */
353
+ async mineKeyFilesFromEmbeddings(projectPath) {
354
+ const embeddingsSystem = getDefaultEmbeddingsSystem();
355
+ await embeddingsSystem.initialize();
356
+ const db = await embeddingsSystem.databaseManager.getDB();
357
+ const table = await db.openTable(embeddingsSystem.databaseManager.fileEmbeddingsTable);
358
+
359
+ // Optimize table to sync indices with data and prevent TakeExec panics
360
+ try {
361
+ await table.optimize();
362
+ } catch (optimizeError) {
363
+ if (optimizeError.message && optimizeError.message.includes('legacy format')) {
364
+ console.log(chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`));
365
+ } else {
366
+ console.warn(chalk.yellow(`Warning: Failed to optimize file embeddings table: ${optimizeError.message}`));
367
+ }
368
+ }
369
+
370
+ const keyFiles = new Map();
371
+
372
+ try {
373
+ console.log(chalk.gray(` 📊 Using LanceDB hybrid search for project: ${projectPath}`));
374
+
375
+ // Unified query function
376
+ const queryFiles = async (config) => {
377
+ try {
378
+ let query = table.query().select(['path', 'name', 'content', 'type', 'language']);
379
+
380
+ if (config.whereClause) {
381
+ query = query.where(`project_path = '${projectPath}' AND (${config.whereClause})`);
382
+ } else if (config.terms) {
383
+ // For term-based searches, query ALL files and sort by depth to prioritize shallow config files
384
+ const allFiles = await table
385
+ .query()
386
+ .select(['path', 'name', 'content', 'type', 'language'])
387
+ .where(`project_path = '${projectPath}'`)
388
+ .toArray(); // NO LIMIT - get all files
389
+
390
+ // Sort by path depth (shorter paths first) to prioritize config files
391
+ allFiles.sort((a, b) => {
392
+ const depthA = (a.path || '').split('/').length;
393
+ const depthB = (b.path || '').split('/').length;
394
+ return depthA - depthB;
395
+ });
396
+
397
+ // Take only the first 500 after sorting to ensure we have shallow files
398
+ const sortedFiles = allFiles.slice(0, 500);
399
+
400
+ return sortedFiles.filter((result) => {
401
+ const content = (result.content || '').toLowerCase();
402
+ const pathName = (result.path || '').toLowerCase();
403
+ const name = (result.name || '').toLowerCase();
404
+ const matches = config.terms.some(
405
+ (term) => content.includes(term.toLowerCase()) || pathName.includes(term.toLowerCase()) || name.includes(term.toLowerCase())
406
+ );
407
+
408
+ return matches;
409
+ });
410
+ } else {
411
+ query = query.where(`project_path = '${projectPath}'`);
412
+ }
413
+
414
+ return await query.limit(config.limit || 30).toArray();
415
+ } catch (error) {
416
+ console.log(chalk.yellow(` ⚠️ Query failed for ${config.category}: ${error.message}`));
417
+ return [];
418
+ }
419
+ };
420
+
421
+ // Execute all searches
422
+ for (const config of DB_SEARCH_CONFIGS) {
423
+ console.log(chalk.gray(` 🔍 Searching for ${config.category} files...`));
424
+
425
+ const results = await queryFiles(config);
426
+ console.log(chalk.gray(` 📦 Found ${results.length} ${config.category} file candidates`));
427
+
428
+ results.forEach((result) => {
429
+ if (this.matchesFileType(result.path, result.name, config.matcher)) {
430
+ keyFiles.set(result.path, { ...result, category: config.category, source: `${config.category}-search` });
431
+ }
432
+ });
433
+ }
434
+ } catch (error) {
435
+ console.error(chalk.red('Error mining embeddings:'), error.message);
436
+ return [];
437
+ }
438
+
439
+ const results = Array.from(keyFiles.values());
440
+ console.log(chalk.cyan(`🗃️ Found ${results.length} key files from embeddings database`));
441
+ return results;
442
+ }
443
+
444
+ /**
445
+ * Unified file type matching using consolidated patterns
446
+ */
447
+ matchesFileType(filePath, fileName, type) {
448
+ if (type === 'docs') return isDocumentationFile(filePath);
449
+ if (type === 'tests') return isTestFile(filePath);
450
+
451
+ const config = FILE_PATTERNS[type];
452
+ if (!config) return false;
453
+
454
+ const fileNameLower = fileName.toLowerCase();
455
+ const filePathLower = filePath.toLowerCase();
456
+
457
+ // Check regex patterns
458
+ const matchesRegex = config.regexes?.some((pattern) => pattern.test(fileNameLower));
459
+
460
+ // Check path conditions
461
+ const matchesPath = config.pathChecks?.some((pathCheck) => filePathLower.includes(pathCheck.toLowerCase()));
462
+
463
+ // Check keywords
464
+ const matchesKeywords = config.keywords?.some((keyword) => fileNameLower.includes(keyword) || filePathLower.includes(keyword));
465
+
466
+ // Check exclusions
467
+ const isExcluded = config.excludePatterns?.some((excludeFn) => excludeFn(filePath));
468
+
469
+ return (matchesRegex || matchesPath || matchesKeywords) && !isExcluded;
470
+ }
471
+
472
+ /**
473
+ * LLM selects final key files from search results with unified JSON parsing
474
+ */
475
+ async selectFinalKeyFiles(candidates, projectPath) {
476
+ if (candidates.length === 0) {
477
+ console.log(chalk.yellow('⚠️ No candidates found from embeddings search'));
478
+ return [];
479
+ }
480
+
481
+ console.log(chalk.cyan(`🤖 LLM analyzing ${candidates.length} candidates...`));
482
+
483
+ const candidatesSummary = candidates
484
+ .map((file, index) => {
485
+ const snippet = file.content.substring(0, 150).replace(/\s+/g, ' ').trim();
486
+ return `${index + 1}. ${file.path} (${file.category}): ${snippet}...`;
487
+ })
488
+ .join('\n');
489
+
490
+ const prompt = `Analyze these ${candidates.length} file candidates and select the most architecturally important files (15-20 maximum).
491
+
492
+ Project: ${path.basename(projectPath)}
493
+
494
+ Files found by embeddings search:
495
+ ${candidatesSummary}
496
+
497
+ Select files that best reveal the project's architecture:
498
+ - Framework setup & key configurations
499
+ - Custom utilities, hooks, and wrappers
500
+ - API/data layer patterns and GraphQL setup
501
+ - Type definitions & core interfaces
502
+ - Entry points, routing, and main structure
503
+ - State management and data flow patterns
504
+
505
+ IMPORTANT: Return ONLY a JSON array of file paths, nothing else:
506
+ ["path1", "path2", "path3"]
507
+
508
+ Select files that define HOW this project works, especially custom implementations.`;
509
+
510
+ try {
511
+ const fileSelectionSchema = {
512
+ type: 'object',
513
+ additionalProperties: false,
514
+ properties: {
515
+ selectedFiles: {
516
+ type: 'array',
517
+ items: {
518
+ type: 'string',
519
+ },
520
+ description: 'Array of file paths selected as architecturally important',
521
+ },
522
+ },
523
+ required: ['selectedFiles'],
524
+ };
525
+
526
+ const response = await this.llm.sendPromptToClaude(prompt, {
527
+ temperature: 0.1,
528
+ maxTokens: 1000,
529
+ jsonSchema: fileSelectionSchema,
530
+ });
531
+
532
+ console.log(chalk.gray(' 📄 LLM Response preview:'), response.content.substring(0, 200));
533
+
534
+ const selectedPaths = response.json.selectedFiles;
535
+
536
+ if (selectedPaths && Array.isArray(selectedPaths) && selectedPaths.length > 0) {
537
+ const keyFiles = selectedPaths
538
+ .map((filePath) => {
539
+ const candidate = candidates.find((f) => f.path === filePath);
540
+ if (candidate) {
541
+ const fullPath = path.join(projectPath, filePath);
542
+ if (fs.existsSync(fullPath)) {
543
+ const stats = fs.statSync(fullPath);
544
+ return {
545
+ relativePath: filePath,
546
+ fullPath,
547
+ category: candidate.category,
548
+ source: candidate.source,
549
+ size: stats.size,
550
+ lastModified: stats.mtime,
551
+ };
552
+ }
553
+ }
554
+ return null;
555
+ })
556
+ .filter(Boolean);
557
+
558
+ console.log(chalk.cyan(`🎯 LLM selected ${keyFiles.length} final key files`));
559
+ return keyFiles;
560
+ } else {
561
+ throw new Error(`Failed to extract valid JSON array from LLM response`);
562
+ }
563
+ } catch (error) {
564
+ console.error(chalk.red('Error in LLM selection:'), error.message);
565
+ console.log(chalk.yellow(' 🔄 Falling back to automatic selection...'));
566
+ return this.fallbackFileSelection(candidates, projectPath);
567
+ }
568
+ }
569
+
570
+ /**
571
+ * Enhanced fallback selection
572
+ */
573
+ fallbackFileSelection(candidates, projectPath) {
574
+ const fallbackFiles = [];
575
+ const categoryLimits = { package: 3, config: 6, setup: 4, utility: 4, types: 3, 'test-config': 2 };
576
+ const categoryCounts = {};
577
+
578
+ for (const candidate of candidates) {
579
+ const category = candidate.category;
580
+ const count = categoryCounts[category] || 0;
581
+ const limit = categoryLimits[category] || 2;
582
+
583
+ if (count < limit && fallbackFiles.length < 15) {
584
+ const fullPath = path.join(projectPath, candidate.path);
585
+ if (fs.existsSync(fullPath)) {
586
+ const stats = fs.statSync(fullPath);
587
+ fallbackFiles.push({
588
+ relativePath: candidate.path,
589
+ fullPath,
590
+ category: candidate.category,
591
+ source: candidate.source,
592
+ size: stats.size,
593
+ lastModified: stats.mtime,
594
+ });
595
+ categoryCounts[category] = count + 1;
596
+ }
597
+ }
598
+ }
599
+
600
+ console.log(chalk.yellow(`⚠️ Used fallback selection: ${fallbackFiles.length} files`));
601
+ return fallbackFiles;
602
+ }
603
+
604
+ /**
605
+ * Calculate hash of key files content to detect changes
606
+ */
607
+ async calculateKeyFilesHash(keyFiles) {
608
+ const hash = crypto.createHash('sha256');
609
+
610
+ for (const file of keyFiles) {
611
+ try {
612
+ const filePath = file.relativePath || file.path;
613
+ const fullPath = file.fullPath || path.join(process.cwd(), filePath);
614
+
615
+ hash.update(filePath);
616
+ if (file.lastModified) {
617
+ hash.update(file.lastModified.toISOString ? file.lastModified.toISOString() : file.lastModified);
618
+ }
619
+
620
+ // For small files, include content snippet
621
+ if (fs.existsSync(fullPath) && file.size < 50 * 1024) {
622
+ const content = fs.readFileSync(fullPath, 'utf8');
623
+ hash.update(content.substring(0, 1000));
624
+ }
625
+ } catch {
626
+ hash.update(file.relativePath || file.path || '');
627
+ }
628
+ }
629
+
630
+ return hash.digest('hex');
631
+ }
632
+
633
+ /**
634
+ * Generate comprehensive project summary using LLM analysis (SINGLE CALL)
635
+ */
636
+ async generateProjectSummary(keyFiles, projectPath) {
637
+ const fileContents = await this.extractFileContents(keyFiles);
638
+
639
+ const prompt = `Analyze this project's architecture and provide a comprehensive summary. Here are the key files:
640
+
641
+ ${fileContents}
642
+
643
+ Please analyze this project and provide a JSON response with:
644
+
645
+ {
646
+ "projectName": "Project name from package.json or inferred",
647
+ "projectType": "Type of project (web app, mobile app, library, etc.)",
648
+ "mainFrameworks": ["Primary frameworks/libraries used"],
649
+ "technologies": ["All technologies, languages, tools identified"],
650
+ "architecturalPatterns": ["Patterns like MVC, component-based, microservices, etc."],
651
+ "keyPatterns": [
652
+ {
653
+ "pattern": "Custom pattern name",
654
+ "description": "How this pattern is implemented",
655
+ "files": ["Relevant file paths"],
656
+ "usage": "When and how it's used"
657
+ }
658
+ ],
659
+ "customImplementations": [
660
+ {
661
+ "name": "Custom feature/hook/utility name",
662
+ "description": "What it does and HOW it modifies standard library behavior",
663
+ "files": ["Files where it's defined"],
664
+ "properties": ["Key properties/methods it exposes, especially any that extend standard objects"],
665
+ "usage": "How it should be used",
666
+ "extendsStandard": "Which standard library/framework objects or APIs this modifies"
667
+ }
668
+ ],
669
+ "apiPatterns": [
670
+ {
671
+ "type": "REST/GraphQL/etc",
672
+ "description": "How APIs are structured",
673
+ "patterns": ["URL patterns or query patterns"],
674
+ "authentication": "Auth method if evident"
675
+ }
676
+ ],
677
+ "stateManagement": {
678
+ "approach": "Redux/Context/Zustand/etc or None",
679
+ "patterns": ["How state is organized"],
680
+ "files": ["Key state management files"]
681
+ },
682
+ "testingApproach": {
683
+ "frameworks": ["Testing frameworks used"],
684
+ "patterns": ["Testing patterns/conventions"],
685
+ "coverage": ["What types of tests are emphasized"]
686
+ },
687
+ "codeStyle": {
688
+ "conventions": ["Naming conventions, file organization, etc."],
689
+ "linting": ["ESLint rules or other style enforcement"],
690
+ "typescript": "Usage level if TypeScript project"
691
+ },
692
+ "deploymentInfo": {
693
+ "platform": "Deployment platform if evident",
694
+ "containerization": "Docker usage if present",
695
+ "buildProcess": "Build tool and process"
696
+ },
697
+ "reviewGuidelines": [
698
+ "Specific guidelines for code review based on this project's patterns",
699
+ "What to look for in PRs",
700
+ "Common patterns that should be maintained",
701
+ "Potential issues specific to this architecture"
702
+ ]
703
+ }
704
+
705
+ Focus on identifying patterns that would help in code review, especially:
706
+ - Custom utilities or modules that extend standard frameworks and libraries
707
+ - **CRITICAL: Custom properties or methods added to standard library objects** (e.g., custom properties on database query results, API responses, or framework objects)
708
+ - **Extensions to library APIs** - any way this project modifies or enhances standard library behavior
709
+ - Specific ways APIs are called and results are handled (look for non-standard patterns)
710
+ - Data flow and processing patterns
711
+ - Module organization and code structure patterns
712
+ - Type definitions and interfaces that define contracts, especially those that extend standard types
713
+ - Configuration patterns and environment handling
714
+ - **Custom wrappers** around standard libraries that add functionality
715
+
716
+ **CRITICAL ANALYSIS REQUIRED**: Look specifically for code that:
717
+ 1. **Takes standard library return values and adds custom properties** - For example:
718
+ - Functions that take query results and add success/loading/error properties
719
+ - Wrappers that enhance API responses with additional metadata
720
+ - Custom hooks that extend standard framework hooks with extra functionality
721
+ 2. **Modifies or extends standard library interfaces** - Look for:
722
+ - TypeScript interfaces that extend standard types with additional fields
723
+ - Custom implementations that add methods to standard objects
724
+ - Wrapper classes that enhance standard library functionality
725
+ 3. **Creates custom versions of standard patterns** - Such as:
726
+ - Custom error handling that adds properties to standard error objects
727
+ - Middleware that modifies standard request/response patterns
728
+ - Custom state management that extends standard patterns
729
+
730
+ **EXAMPLES TO RECOGNIZE**:
731
+ - If you see a function that takes a standard query result and returns an object with added success/error properties, identify this as a custom implementation
732
+ - If you see custom hooks that wrap standard library hooks and add properties, document these
733
+ - If you see type definitions that extend standard interfaces, note what properties they add
734
+
735
+ **OUTPUT REQUIREMENT**: For each custom implementation found, specifically identify what standard library object or pattern it extends in the "extendsStandard" field.
736
+
737
+ Be thorough but concise. This summary will be used to provide context during automated code reviews to prevent false positives about "non-standard" properties that are actually valid custom implementations in this project.`;
738
+
739
+ try {
740
+ const projectSummarySchema = {
741
+ type: 'object',
742
+ additionalProperties: false,
743
+ properties: {
744
+ projectName: { type: 'string' },
745
+ projectType: { type: 'string' },
746
+ mainFrameworks: {
747
+ type: 'array',
748
+ items: { type: 'string' },
749
+ },
750
+ technologies: {
751
+ type: 'array',
752
+ items: { type: 'string' },
753
+ },
754
+ architecture: {
755
+ type: 'object',
756
+ properties: {
757
+ pattern: { type: 'string' },
758
+ description: { type: 'string' },
759
+ layers: {
760
+ type: 'array',
761
+ items: { type: 'string' },
762
+ },
763
+ },
764
+ },
765
+ keyComponents: {
766
+ type: 'array',
767
+ items: {
768
+ type: 'object',
769
+ properties: {
770
+ name: { type: 'string' },
771
+ type: { type: 'string' },
772
+ description: { type: 'string' },
773
+ dependencies: {
774
+ type: 'array',
775
+ items: { type: 'string' },
776
+ },
777
+ },
778
+ required: ['name', 'type', 'description'],
779
+ },
780
+ },
781
+ customImplementations: {
782
+ type: 'array',
783
+ items: {
784
+ type: 'object',
785
+ properties: {
786
+ name: { type: 'string' },
787
+ description: { type: 'string' },
788
+ extendsStandard: { type: 'string' },
789
+ files: {
790
+ type: 'array',
791
+ items: { type: 'string' },
792
+ },
793
+ },
794
+ required: ['name', 'description', 'extendsStandard'],
795
+ },
796
+ },
797
+ },
798
+ required: [
799
+ 'projectName',
800
+ 'projectType',
801
+ 'mainFrameworks',
802
+ 'technologies',
803
+ 'architecture',
804
+ 'keyComponents',
805
+ 'customImplementations',
806
+ ],
807
+ };
808
+
809
+ const response = await this.llm.sendPromptToClaude(prompt, {
810
+ temperature: 0.1,
811
+ maxTokens: 4000,
812
+ jsonSchema: projectSummarySchema,
813
+ });
814
+
815
+ const summary = response.json;
816
+ if (summary) {
817
+ // Validate and ensure required fields exist (Sonnet 4.5 compatibility)
818
+ const validatedSummary = this.validateProjectSummary(summary);
819
+
820
+ // Add metadata
821
+ validatedSummary.analysisDate = new Date().toISOString();
822
+ validatedSummary.projectPath = projectPath;
823
+ validatedSummary.keyFilesCount = keyFiles.length;
824
+ return validatedSummary;
825
+ } else {
826
+ console.error(chalk.red('Failed to parse LLM response as JSON'));
827
+ console.error(chalk.gray('Response content preview:'), response.content.substring(0, 500));
828
+ throw new Error('Failed to parse LLM response as JSON');
829
+ }
830
+ } catch (error) {
831
+ console.error(chalk.red('Error generating project summary:'), error.message);
832
+ const fallback = this.createFallbackSummary(projectPath, keyFiles);
833
+ console.log(chalk.yellow('Using fallback summary with technologies:'), fallback.technologies);
834
+ return fallback;
835
+ }
836
+ }
837
+
838
+ /**
839
+ * Extract and format file contents for LLM analysis
840
+ */
841
+ async extractFileContents(keyFiles) {
842
+ let content = '';
843
+ let totalSize = 0;
844
+ const maxTotalSize = 100 * 1024; // 100KB total
845
+
846
+ for (const file of keyFiles.slice(0, 25)) {
847
+ // Max 25 files
848
+ if (totalSize >= maxTotalSize) break;
849
+
850
+ try {
851
+ const fileContent = fs.readFileSync(file.fullPath, 'utf8');
852
+ const remainingSize = maxTotalSize - totalSize;
853
+ const contentToAdd = fileContent.substring(0, Math.min(fileContent.length, remainingSize));
854
+
855
+ content += `\n\n=== ${file.relativePath} (${file.category}) ===\n${contentToAdd}`;
856
+ totalSize += contentToAdd.length;
857
+ } catch (error) {
858
+ content += `\n\n=== ${file.relativePath} (${file.category}) ===\n[Could not read file: ${error.message}]`;
859
+ }
860
+ }
861
+
862
+ return content;
863
+ }
864
+
865
+ /**
866
+ * Validate and ensure project summary has all required fields (Sonnet 4.5 compatibility)
867
+ */
868
+ validateProjectSummary(summary) {
869
+ // Ensure all required fields exist with safe defaults
870
+ const validatedSummary = {
871
+ projectName: summary.projectName || 'Unknown Project',
872
+ projectType: summary.projectType || 'Unknown',
873
+ mainFrameworks: Array.isArray(summary.mainFrameworks) ? summary.mainFrameworks : [],
874
+ technologies: Array.isArray(summary.technologies) ? summary.technologies : [],
875
+ architecturalPatterns: Array.isArray(summary.architecturalPatterns) ? summary.architecturalPatterns : [],
876
+ keyPatterns: Array.isArray(summary.keyPatterns) ? summary.keyPatterns : [],
877
+ customImplementations: Array.isArray(summary.customImplementations) ? summary.customImplementations : [],
878
+ apiPatterns: Array.isArray(summary.apiPatterns) ? summary.apiPatterns : [],
879
+ stateManagement: summary.stateManagement || {
880
+ approach: 'Unknown',
881
+ patterns: [],
882
+ },
883
+ reviewGuidelines: Array.isArray(summary.reviewGuidelines) ? summary.reviewGuidelines : [],
884
+ // Preserve any other fields that might exist
885
+ ...summary,
886
+ };
887
+
888
+ // Ensure stateManagement has required structure
889
+ if (validatedSummary.stateManagement && typeof validatedSummary.stateManagement === 'object') {
890
+ validatedSummary.stateManagement = {
891
+ approach: validatedSummary.stateManagement.approach || 'Unknown',
892
+ patterns: Array.isArray(validatedSummary.stateManagement.patterns) ? validatedSummary.stateManagement.patterns : [],
893
+ ...validatedSummary.stateManagement,
894
+ };
895
+ }
896
+
897
+ console.log(
898
+ chalk.cyan(
899
+ `✅ Project summary validated - Technologies: ${validatedSummary.technologies.length}, Frameworks: ${validatedSummary.mainFrameworks.length}`
900
+ )
901
+ );
902
+
903
+ return validatedSummary;
904
+ }
905
+
906
+ /**
907
+ * Create a basic fallback summary when LLM analysis fails
908
+ */
909
+ createFallbackSummary(projectPath, keyFiles = []) {
910
+ const packageJsonPath = path.join(projectPath, 'package.json');
911
+ let projectName = path.basename(projectPath);
912
+ let technologies = [];
913
+
914
+ if (fs.existsSync(packageJsonPath)) {
915
+ try {
916
+ const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
917
+ projectName = packageJson.name || projectName;
918
+ const deps = { ...packageJson.dependencies, ...packageJson.devDependencies };
919
+ technologies = Object.keys(deps).slice(0, 10);
920
+ } catch {
921
+ // Continue with defaults
922
+ }
923
+ }
924
+
925
+ return {
926
+ projectName,
927
+ projectType: 'Unknown',
928
+ mainFrameworks: [],
929
+ technologies,
930
+ architecturalPatterns: [],
931
+ keyPatterns: [],
932
+ customImplementations: [],
933
+ apiPatterns: [],
934
+ stateManagement: { approach: 'Unknown', patterns: [], files: [] },
935
+ testingApproach: { frameworks: [], patterns: [], coverage: [] },
936
+ codeStyle: { conventions: [], linting: [], typescript: 'Unknown' },
937
+ deploymentInfo: { platform: 'Unknown', containerization: false, buildProcess: 'Unknown' },
938
+ reviewGuidelines: [
939
+ 'Follow established patterns in the codebase',
940
+ 'Maintain consistency with existing code style',
941
+ 'Ensure proper error handling',
942
+ 'Add appropriate tests for new functionality',
943
+ ],
944
+ analysisDate: new Date().toISOString(),
945
+ projectPath,
946
+ keyFilesCount: keyFiles.length,
947
+ keyFiles: keyFiles.map((f) => ({
948
+ path: f.relativePath,
949
+ category: f.category,
950
+ lastModified: f.lastModified?.toISOString() || new Date().toISOString(),
951
+ })),
952
+ fallback: true,
953
+ };
954
+ }
955
+ }