codecritique 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +1145 -0
  3. package/package.json +98 -0
  4. package/src/content-retrieval.js +747 -0
  5. package/src/custom-documents.js +597 -0
  6. package/src/embeddings/cache-manager.js +364 -0
  7. package/src/embeddings/constants.js +40 -0
  8. package/src/embeddings/database.js +921 -0
  9. package/src/embeddings/errors.js +208 -0
  10. package/src/embeddings/factory.js +447 -0
  11. package/src/embeddings/file-processor.js +851 -0
  12. package/src/embeddings/model-manager.js +337 -0
  13. package/src/embeddings/similarity-calculator.js +97 -0
  14. package/src/embeddings/types.js +113 -0
  15. package/src/feedback-loader.js +384 -0
  16. package/src/index.js +1418 -0
  17. package/src/llm.js +123 -0
  18. package/src/pr-history/analyzer.js +579 -0
  19. package/src/pr-history/bot-detector.js +123 -0
  20. package/src/pr-history/cli-utils.js +204 -0
  21. package/src/pr-history/comment-processor.js +549 -0
  22. package/src/pr-history/database.js +819 -0
  23. package/src/pr-history/github-client.js +629 -0
  24. package/src/project-analyzer.js +955 -0
  25. package/src/rag-analyzer.js +2764 -0
  26. package/src/rag-review.js +566 -0
  27. package/src/technology-keywords.json +753 -0
  28. package/src/utils/command.js +48 -0
  29. package/src/utils/constants.js +263 -0
  30. package/src/utils/context-inference.js +364 -0
  31. package/src/utils/document-detection.js +105 -0
  32. package/src/utils/file-validation.js +271 -0
  33. package/src/utils/git.js +232 -0
  34. package/src/utils/language-detection.js +170 -0
  35. package/src/utils/logging.js +24 -0
  36. package/src/utils/markdown.js +132 -0
  37. package/src/utils/mobilebert-tokenizer.js +141 -0
  38. package/src/utils/pr-chunking.js +276 -0
  39. package/src/utils/string-utils.js +28 -0
  40. package/src/zero-shot-classifier-open.js +392 -0
package/src/index.js ADDED
@@ -0,0 +1,1418 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * AI Code Review Tool - Command Line Interface
5
+ *
6
+ * Main entry point for the AI code review tool using the RAG approach.
7
+ */
8
+
9
+ import { execSync } from 'child_process';
10
+ import fs from 'fs';
11
+ import { readFileSync } from 'fs';
12
+ import path from 'path';
13
+ import readline from 'readline';
14
+ import chalk from 'chalk';
15
+ import { Spinner } from 'cli-spinner';
16
+ import { program } from 'commander';
17
+ import { glob } from 'glob';
18
+ import { getDefaultEmbeddingsSystem } from './embeddings/factory.js';
19
+ import { PRHistoryAnalyzer } from './pr-history/analyzer.js';
20
+ import {
21
+ displayAnalysisResults,
22
+ displayDatabaseStats,
23
+ displayProgress,
24
+ displayStatus,
25
+ getRepositoryAndProjectPath,
26
+ validateGitHubToken,
27
+ } from './pr-history/cli-utils.js';
28
+ import { cleanupClassifier, clearPRComments, getPRCommentsStats, hasPRComments } from './pr-history/database.js';
29
+ import { ProjectAnalyzer } from './project-analyzer.js';
30
+ import { reviewFile, reviewFiles, reviewPullRequest } from './rag-review.js';
31
+ import { execGitSafe } from './utils/command.js';
32
+ import { ensureBranchExists, findBaseBranch } from './utils/git.js';
33
+
34
+ // Create a default embeddings system instance
35
+ const embeddingsSystem = getDefaultEmbeddingsSystem();
36
+
37
+ const packageJson = JSON.parse(readFileSync(new URL('../package.json', import.meta.url), 'utf8'));
38
+
39
+ // Configure command-line interface
40
+ program.name('codecritique').description('CLI tool for AI-powered code review using the RAG approach').version(packageJson.version);
41
+
42
+ // Analyze command (restored from previous state if necessary, or kept as is)
43
+ program
44
+ .command('analyze')
45
+ .description('Analyze code using dynamic context (RAG approach)')
46
+ .option('-b, --diff-with <branch>', 'Analyze files changed compared to a branch (triggers PR review mode)')
47
+ .option('-f, --files <files...>', 'Specific files or glob patterns to review')
48
+ .option('--file <file>', 'Analyze a single file')
49
+ .option('-d, --directory <dir>', 'Working directory for git operations (use with --diff-with)')
50
+ .option('-o, --output <format>', 'Output format (text, json, markdown)', 'text')
51
+ .option('--output-file <file>', 'Save output to file (useful with --output json)')
52
+ .option('--no-color', 'Disable colored output')
53
+ .option('--verbose', 'Show verbose output')
54
+ .option('--model <model>', 'LLM model to use (e.g., claude-sonnet-4-5)')
55
+ .option('--temperature <number>', 'LLM temperature', parseFloat, 0.2)
56
+ .option('--max-tokens <number>', 'LLM max tokens', parseInt, 8192)
57
+ .option('--similarity-threshold <number>', 'Threshold for finding similar code examples', parseFloat, 0.6)
58
+ .option('--max-examples <number>', 'Max similar code examples to use', parseInt, 5)
59
+ .option('--concurrency <number>', 'Concurrency for processing multiple files', parseInt, 3)
60
+ .option(
61
+ '--doc <specs...>',
62
+ 'A document to provide custom instructions to the LLM (e.g., "Engineering Guidelines:./docs/guidelines.md"). Can be specified multiple times.'
63
+ )
64
+ .option('--feedback-path <path>', 'Path to feedback artifacts directory for filtering dismissed issues')
65
+ .option('--track-feedback', 'Enable feedback-aware analysis to avoid previously dismissed issues')
66
+ .option('--feedback-threshold <number>', 'Similarity threshold for feedback filtering (0-1)', parseFloat, 0.7)
67
+ .action(runCodeReview); // Assumes runCodeReview function exists and is correct
68
+
69
+ // Existing Embeddings commands (ensure they are present and correct)
70
+ program
71
+ .command('embeddings:generate')
72
+ .description('Generate embeddings for the codebase')
73
+ .option('-d, --directory <dir>', 'Directory to process', '.')
74
+ .option('-f, --files <files...>', 'Specific files or patterns to process')
75
+ .option('-c, --concurrency <number>', 'Number of concurrent embedding requests', '10') // Default concurrency 10
76
+ .option(
77
+ '--max-lines <number>',
78
+ 'Maximum lines per code file for embeddings (documentation files are not truncated, default: 1000)',
79
+ '1000'
80
+ )
81
+ .option('--verbose', 'Show verbose output')
82
+ .option('--exclude <patterns...>', 'Patterns to exclude (e.g., "**/*.test.js" "docs/**")')
83
+ .option('--exclude-file <file>', 'File containing patterns to exclude (one per line)')
84
+ .option('--no-gitignore', 'Disable automatic exclusion of files in .gitignore')
85
+ .option('--force-analysis', 'Force regeneration of project architecture analysis (bypass cache)')
86
+ .action(generateEmbeddings); // Assumes generateEmbeddings function exists and is correct
87
+
88
+ program
89
+ .command('embeddings:clear')
90
+ .description('Clear stored embeddings for the current project')
91
+ .option('-d, --directory <dir>', 'Directory of the project to clear embeddings for', '.')
92
+ .action(clearEmbeddings);
93
+
94
+ program
95
+ .command('embeddings:clear-all')
96
+ .description('Clear ALL stored embeddings (affects all projects - use with caution)')
97
+ .action(async () => {
98
+ try {
99
+ console.log(chalk.red('WARNING: This will clear embeddings for ALL projects on this machine!'));
100
+ console.log(chalk.cyan('Clearing all embeddings...'));
101
+ await embeddingsSystem.clearAllEmbeddings();
102
+ console.log(chalk.green('All embeddings have been cleared.'));
103
+ await embeddingsSystem.databaseManager.cleanup();
104
+ } catch (err) {
105
+ console.error(chalk.red('Error clearing all embeddings:'), err.message);
106
+ try {
107
+ await embeddingsSystem.databaseManager.cleanup();
108
+ } catch (cleanupErr) {
109
+ console.error(chalk.red('Error during cleanup:'), cleanupErr.message);
110
+ }
111
+ process.exit(1);
112
+ }
113
+ });
114
+
115
+ program
116
+ .command('embeddings:stats')
117
+ .description('Show statistics about stored embeddings')
118
+ .option('-d, --directory <dir>', 'Directory of the project to show stats for (shows all projects if not specified)')
119
+ .action(showEmbeddingStats);
120
+
121
+ // PR History Analysis commands
122
+ program
123
+ .command('pr-history:analyze')
124
+ .description('Analyze PR comment history for the current project or specified repository')
125
+ .option('-d, --directory <dir>', 'Project directory to analyze (auto-detects GitHub repo)', '.')
126
+ .option('-r, --repository <repo>', 'GitHub repository in format "owner/repo" (overrides auto-detection)')
127
+ .option('-t, --token <token>', 'GitHub API token (or set GITHUB_TOKEN env var)')
128
+ .option('--since <date>', 'Only analyze PRs since this date (ISO format)')
129
+ .option('--until <date>', 'Only analyze PRs until this date (ISO format)')
130
+ .option('--limit <number>', 'Limit number of PRs to analyze', parseInt)
131
+ .option('--resume', 'Resume interrupted analysis')
132
+ .option('--clear', 'Clear existing data before analysis')
133
+ .option('--concurrency <number>', 'Number of concurrent requests', parseInt, 2)
134
+ .option('--batch-size <number>', 'Batch size for processing', parseInt, 50)
135
+ .option('--verbose', 'Show verbose output')
136
+ .action(analyzePRHistory);
137
+
138
+ program
139
+ .command('pr-history:status')
140
+ .description('Check PR analysis status for the current project or specified repository')
141
+ .option('-d, --directory <dir>', 'Project directory to check status for', '.')
142
+ .option('-r, --repository <repo>', 'GitHub repository in format "owner/repo" (overrides auto-detection)')
143
+ .action(getPRHistoryStatus);
144
+
145
+ program
146
+ .command('pr-history:clear')
147
+ .description('Clear PR analysis data for the current project or specified repository')
148
+ .option('-d, --directory <dir>', 'Project directory to clear data for', '.')
149
+ .option('-r, --repository <repo>', 'GitHub repository in format "owner/repo" (overrides auto-detection)')
150
+ .option('--force', 'Skip confirmation prompts')
151
+ .action(clearPRHistory);
152
+
153
+ // Add examples to the help text (simplified analyze examples)
154
+ program.on('--help', () => {
155
+ console.log(`
156
+ Examples:
157
+ $ codecritique analyze --directory src/components
158
+ $ codecritique analyze --file src/utils/validation.ts
159
+ $ codecritique analyze --files "src/**/*.tsx" "lib/*.js"
160
+ $ codecritique analyze -b main
161
+ $ codecritique analyze --doc "Our Eng Guidelines:./ENGINEERING_GUIDELINES.md" --file src/utils/validation.ts
162
+ $ codecritique analyze --diff-with feature-branch -d /path/to/repo
163
+ $ codecritique analyze --output json > review-results.json
164
+ $ codecritique analyze --track-feedback --feedback-path ./feedback-artifacts --file src/utils/validation.ts
165
+ $ codecritique analyze --track-feedback --feedback-threshold 0.8 -b main
166
+ $ codecritique embeddings:generate --directory src
167
+ $ codecritique embeddings:generate --exclude "**/*.test.js" "**/*.spec.js"
168
+ $ codecritique embeddings:generate --exclude-file .embedignore
169
+ $ codecritique embeddings:generate --no-gitignore
170
+ $ codecritique embeddings:generate --force-analysis
171
+ $ codecritique embeddings:stats
172
+ $ codecritique embeddings:stats --directory /path/to/project
173
+ $ codecritique embeddings:clear
174
+ $ codecritique embeddings:clear --directory /path/to/project
175
+ $ codecritique embeddings:clear-all
176
+ $ codecritique pr-history:analyze
177
+ $ codecritique pr-history:analyze --repository owner/repo --token ghp_xxx
178
+ $ codecritique pr-history:analyze --directory /path/to/project --since 2024-01-01
179
+ $ codecritique pr-history:status
180
+ $ codecritique pr-history:status --repository owner/repo
181
+ $ codecritique pr-history:clear
182
+ $ codecritique pr-history:clear --repository owner/repo --force
183
+ `);
184
+ });
185
+
186
+ // For backward compatibility with the old command format
187
+ const hasCommand = process.argv
188
+ .slice(2)
189
+ .some(
190
+ (arg) =>
191
+ arg === 'analyze' ||
192
+ arg === 'embeddings:generate' ||
193
+ arg === 'embeddings:clear' ||
194
+ arg === 'embeddings:clear-all' ||
195
+ arg === 'embeddings:stats' ||
196
+ arg === 'pr-history:analyze' ||
197
+ arg === 'pr-history:status' ||
198
+ arg === 'pr-history:clear'
199
+ );
200
+
201
+ if (!hasCommand && process.argv.length > 2) {
202
+ // If no command is specified but there are arguments, default to 'analyze'
203
+ program.parse(['node', 'index.js', 'analyze', ...process.argv.slice(2)]);
204
+ } else {
205
+ program.parse();
206
+ }
207
+
208
+ // Register process event handlers for cleanup (embeddings cleanup primarily)
209
+ process.on('SIGINT', async () => {
210
+ console.log(chalk.yellow('\nReceived SIGINT. Attempting graceful shutdown...'));
211
+ // Set a timeout to force exit if cleanup hangs
212
+ const forceExitTimeout = setTimeout(() => {
213
+ console.error(chalk.red('Cleanup timed out after 10 seconds. Forcing exit...'));
214
+ process.exit(1); // Force exit with error code
215
+ }, 10000); // 10 seconds timeout
216
+
217
+ try {
218
+ console.log(chalk.cyan('SIGINT handler: Attempting embeddingsSystem.cleanup()...'));
219
+ await embeddingsSystem.cleanup();
220
+ console.log(chalk.green('embeddingsSystem.cleanup() completed.'));
221
+ clearTimeout(forceExitTimeout); // Cleanup finished, clear the timeout
222
+ console.log(chalk.cyan('SIGINT handler: Exiting normally (code 0).'));
223
+ process.exit(0); // Exit normally
224
+ } catch (err) {
225
+ console.error(chalk.red('Error during embeddingsSystem.cleanup():'), err.message);
226
+ clearTimeout(forceExitTimeout);
227
+ console.log(chalk.cyan('SIGINT handler: Exiting after error (code 1).'));
228
+ process.exit(1); // Exit with error code
229
+ }
230
+ });
231
+
232
+ process.on('SIGTERM', async () => {
233
+ console.log(chalk.yellow('\nReceived SIGTERM. Attempting graceful shutdown...'));
234
+ // Set a timeout to force exit if cleanup hangs
235
+ const forceExitTimeout = setTimeout(() => {
236
+ console.error(chalk.red('Cleanup timed out after 10 seconds. Forcing exit...'));
237
+ process.exit(1); // Force exit with error code
238
+ }, 10000);
239
+
240
+ try {
241
+ console.log(chalk.cyan('SIGTERM handler: Attempting embeddingsSystem.cleanup()...'));
242
+ await embeddingsSystem.cleanup();
243
+ console.log(chalk.green('embeddingsSystem.cleanup() completed.'));
244
+ clearTimeout(forceExitTimeout); // Cleanup finished, clear the timeout
245
+ console.log(chalk.cyan('SIGTERM handler: Exiting normally (code 0).'));
246
+ process.exit(0); // Exit normally
247
+ } catch (err) {
248
+ console.error(chalk.red('Error during embeddingsSystem.cleanup():'), err.message);
249
+ clearTimeout(forceExitTimeout);
250
+ console.log(chalk.cyan('SIGTERM handler: Exiting after error (code 1).'));
251
+ process.exit(1); // Exit with error code
252
+ }
253
+ });
254
+
255
+ // Ensure cleanup on normal exit
256
+ process.on('exit', () => {
257
+ // Note: Async cleanup might not fully complete here
258
+ console.log(chalk.cyan('Exiting...'));
259
+ });
260
+
261
+ // REMOVED: Old options processing logic for ignore/severity
262
+
263
+ // REMOVED: Old LLM import
264
+ // import * as llm from './llm.js';
265
+
266
+ // Main function to run the code review (Refactored to use cag-review.js)
267
+ async function runCodeReview(options) {
268
+ let reviewTask = null;
269
+ let operationDescription = '';
270
+ const startTime = Date.now();
271
+
272
+ // Determine the project directory for embedding searches
273
+ // If --directory is specified, use that as the project directory
274
+ // Otherwise, use the current working directory
275
+ const projectPath = options.directory ? path.resolve(options.directory) : process.cwd();
276
+ console.log(chalk.gray(`Using project path for analysis: ${projectPath}`));
277
+
278
+ // Parse custom documents
279
+ const customDocs = [];
280
+ if (options.doc) {
281
+ for (const docSpec of options.doc) {
282
+ const separatorIndex = docSpec.indexOf(':');
283
+ if (separatorIndex === -1) {
284
+ console.error(chalk.red(`Invalid --doc format: "${docSpec}". Expected "title:path/to/file.md"`));
285
+ process.exit(1);
286
+ }
287
+ const title = docSpec.substring(0, separatorIndex).trim();
288
+ const filePath = docSpec.substring(separatorIndex + 1).trim();
289
+
290
+ if (!fs.existsSync(filePath)) {
291
+ console.error(chalk.red(`Document file not found: ${filePath}`));
292
+ process.exit(1);
293
+ }
294
+ let content = fs.readFileSync(filePath, 'utf-8');
295
+
296
+ // Clean up the content for better LLM processing
297
+ content = content
298
+ .replace(/\r\n/g, '\n') // Normalize line endings
299
+ .replace(/\r/g, '\n') // Handle old Mac line endings
300
+ .replace(/\n\s*\n\s*\n/g, '\n\n') // Remove excessive blank lines
301
+ // Join broken sentences and fragmented content
302
+ .replace(/([a-z,;:])\n([a-z])/g, '$1 $2') // Join broken sentences (lowercase to lowercase)
303
+ .replace(/([a-zA-Z])\n([a-zA-Z])/g, '$1 $2') // Join general broken lines
304
+ // Handle numbered/lettered list items that are fragmented
305
+ .replace(/(\d+\.)\n\s*([A-Za-z])/g, '$1 $2') // Join numbered items
306
+ .replace(/([a-z]\.)\n\s*([A-Za-z])/g, '$1 $2') // Join lettered items
307
+ // Handle bullet points and dashes that are fragmented
308
+ .replace(/(-|\*|\+)\n\s*([A-Za-z])/g, '$1 $2') // Join bullet items
309
+ // Join lines that end with incomplete words or phrases
310
+ .replace(/([a-z])\n\s+([a-z])/g, '$1 $2') // Join with leading spaces
311
+ // Clean up any excessive whitespace created by joining
312
+ .replace(/ {2,}/g, ' ') // Replace multiple spaces with single space
313
+ .replace(/\n\s*\n\s*\n/g, '\n\n') // Remove excessive blank lines again
314
+ .trim(); // Remove leading/trailing whitespace
315
+
316
+ customDocs.push({ title, content });
317
+ console.log(chalk.gray(`Loaded custom document '${title}' from ${filePath} (${content.length} chars)`));
318
+
319
+ // Debug: Show a clean preview of the content
320
+ const previewLength = 200;
321
+ const preview = content.substring(0, previewLength).replace(/\n/g, '\\n');
322
+ console.log(chalk.gray(` Content preview: ${preview}${content.length > previewLength ? '...' : ''}`));
323
+ }
324
+ }
325
+
326
+ // Consolidate review options to pass down
327
+ const reviewOptions = {
328
+ verbose: options.verbose,
329
+ model: options.model,
330
+ temperature: options.temperature,
331
+ maxTokens: options.maxTokens,
332
+ similarityThreshold: options.similarityThreshold,
333
+ maxExamples: options.maxExamples,
334
+ concurrency: options.concurrency,
335
+ projectPath: projectPath, // Add project path for embedding searches
336
+ directory: options.directory, // Also pass the directory option
337
+ customDocs,
338
+ // Feedback options
339
+ feedbackPath: options.feedbackPath,
340
+ trackFeedback: options.trackFeedback,
341
+ feedbackThreshold: options.feedbackThreshold,
342
+ // Add any other relevant options here
343
+ };
344
+
345
+ try {
346
+ console.log(chalk.bold.blue('AI Code Review (RAG Approach) - Starting analysis...'));
347
+
348
+ // Determine the review mode based on options
349
+ // Only support: single file, specific files, or diff with branch
350
+ if (options.diffWith) {
351
+ // Use directory option as working directory for git commands if specified
352
+ const gitWorkingDir = options.directory ? path.resolve(options.directory) : process.cwd();
353
+ const changedFiles = getChangedFiles(options.diffWith, gitWorkingDir);
354
+ if (changedFiles.length === 0) {
355
+ console.log(chalk.yellow(`No changed files found compared to branch '${options.diffWith}'. Exiting.`));
356
+ return;
357
+ }
358
+ operationDescription = `${changedFiles.length} files changed vs ${options.diffWith}`;
359
+ // Add the actual branch name to reviewOptions
360
+ const enhancedReviewOptions = {
361
+ ...reviewOptions,
362
+ actualBranch: options.diffWith,
363
+ diffWith: options.diffWith,
364
+ };
365
+ reviewTask = reviewPullRequest(changedFiles, enhancedReviewOptions);
366
+ } else if (options.file) {
367
+ operationDescription = `single file: ${options.file}`;
368
+ if (!fs.existsSync(options.file)) {
369
+ throw new Error(`File not found: ${options.file}`);
370
+ }
371
+ reviewTask = reviewFile(options.file, reviewOptions);
372
+ } else if (options.files && options.files.length > 0) {
373
+ const filesToAnalyze = await expandFilePatterns(options.files);
374
+ if (filesToAnalyze.length === 0) {
375
+ console.log(chalk.yellow('No files found matching the specified patterns. Exiting.'));
376
+ return;
377
+ }
378
+ operationDescription = `${filesToAnalyze.length} specific files/patterns`;
379
+ reviewTask = reviewFiles(filesToAnalyze, reviewOptions);
380
+ } else {
381
+ // No valid options provided - show error and exit
382
+ console.error(chalk.red('Error: You must specify one of the following:'));
383
+ console.error(chalk.yellow(' --file <file> Analyze a single file'));
384
+ console.error(chalk.yellow(' --files <files...> Analyze specific files or glob patterns'));
385
+ console.error(chalk.yellow(' -b, --diff-with <branch> Analyze files changed in a branch'));
386
+ console.error(chalk.gray('\nOptional:'));
387
+ console.error(chalk.gray(' -d, --directory <dir> Working directory (for git operations with --diff-with)'));
388
+ console.error(chalk.gray('\nExamples:'));
389
+ console.error(chalk.gray(' codecritique analyze --file src/component.tsx'));
390
+ console.error(chalk.gray(' codecritique analyze --files "src/**/*.ts"'));
391
+ console.error(chalk.gray(' codecritique analyze -b feature-branch'));
392
+ console.error(
393
+ chalk.gray(' codecritique analyze --doc "Our Eng Guidelines:./ENGINEERING_GUIDELINES.md" --file src/utils/validation.ts')
394
+ );
395
+ console.error(chalk.gray(' codecritique analyze --diff-with feature-branch -d /path/to/repo'));
396
+ process.exit(1);
397
+ }
398
+
399
+ console.log(chalk.cyan(`Starting review for ${operationDescription}...`));
400
+
401
+ // Execute the selected review task
402
+ const reviewResult = await reviewTask;
403
+
404
+ const endTime = Date.now();
405
+ const duration = ((endTime - startTime) / 1000).toFixed(2);
406
+
407
+ if (options.verbose) {
408
+ console.log(chalk.blue(`Review process took ${duration} seconds.`));
409
+ }
410
+
411
+ // Process and output results
412
+ if (reviewResult && reviewResult.success) {
413
+ if (reviewResult.results && reviewResult.results.length > 0) {
414
+ console.log(chalk.green(`Found ${reviewResult.results.length} result items to display`));
415
+ // Determine output function based on format option
416
+ const outputFn = options.output === 'json' ? outputJson : options.output === 'markdown' ? outputMarkdown : outputText;
417
+ // Pass the detailed results array to the output function
418
+ outputFn(reviewResult.results, options);
419
+ console.log(chalk.bold.green(`\nAnalysis complete for ${operationDescription}! (${duration}s)`));
420
+ } else {
421
+ console.log(chalk.yellow('No results to display. Review result structure:'));
422
+ console.log(chalk.yellow('reviewResult.results exists?'), reviewResult.results ? 'Yes' : 'No');
423
+ if (reviewResult.results) {
424
+ console.log(chalk.yellow('reviewResult.results type:'), typeof reviewResult.results);
425
+ console.log(chalk.yellow('reviewResult.results is array?'), Array.isArray(reviewResult.results));
426
+ if (!Array.isArray(reviewResult.results)) {
427
+ console.log(
428
+ chalk.yellow('reviewResult.results content:'),
429
+ JSON.stringify(reviewResult.results, null, 2).substring(0, 500) + '...'
430
+ );
431
+ }
432
+ }
433
+ console.log(chalk.yellow(reviewResult.message || 'Review completed, but no results to display.'));
434
+ }
435
+ } else {
436
+ console.error(chalk.red('\nCode review process failed.'));
437
+ if (reviewResult && reviewResult.error) {
438
+ console.error(chalk.red(`Error: ${reviewResult.error}`));
439
+ }
440
+ }
441
+
442
+ // Clean up resources
443
+ console.log(chalk.cyan('Cleaning up resources...'));
444
+ try {
445
+ await embeddingsSystem.cleanup();
446
+ await cleanupClassifier();
447
+ console.log(chalk.green('All resources cleaned up successfully'));
448
+ } catch (cleanupErr) {
449
+ console.error(chalk.yellow('Error during cleanup:'), cleanupErr.message);
450
+ process.exit(1);
451
+ }
452
+ } catch (err) {
453
+ console.error(chalk.red(`\nError during code review (${operationDescription}):`), err.message);
454
+ console.error(err.stack);
455
+ // Clean up resources even on error
456
+ try {
457
+ await embeddingsSystem.cleanup();
458
+ await cleanupClassifier();
459
+ console.log(chalk.green('All resources cleaned up successfully'));
460
+ } catch (cleanupErr) {
461
+ console.error(chalk.red('Error during cleanup:'), cleanupErr.message);
462
+ }
463
+ process.exit(1);
464
+ }
465
+ }
466
+
467
+ // --- Embeddings commands remain largely unchanged --- //
468
+
469
+ /**
470
+ * Generate embeddings for the codebase
471
+ *
472
+ * @param {Object} options - Command options
473
+ */
474
+ async function generateEmbeddings(options) {
475
+ try {
476
+ console.log(chalk.bold.blue('AI Code Review - Generating embeddings...'));
477
+ const startTime = Date.now();
478
+
479
+ // Determine the working directory for project separation
480
+ // If --directory is specified, use that as the project directory
481
+ // Otherwise, use the current working directory
482
+ const projectDir = options.directory ? path.resolve(options.directory) : process.cwd();
483
+ const baseDir = path.resolve(options.directory || '.'); // For file processing
484
+
485
+ console.log(chalk.cyan(`Project directory for embeddings: ${projectDir}`));
486
+ console.log(chalk.cyan(`Base directory for file processing: ${baseDir}`));
487
+
488
+ // Process exclusion patterns BEFORE file discovery
489
+ console.log(chalk.cyan('Processing exclusion patterns...'));
490
+ let excludePatterns = options.exclude || [];
491
+
492
+ // Add patterns from exclude file if specified
493
+ if (options.excludeFile) {
494
+ const excludeFilePath = path.resolve(options.excludeFile);
495
+ if (fs.existsSync(excludeFilePath)) {
496
+ console.log(chalk.cyan(`Loading exclusion patterns from: ${excludeFilePath}`));
497
+ const excludeFileContent = fs.readFileSync(excludeFilePath, 'utf8');
498
+ const filePatterns = excludeFileContent
499
+ .split('\n')
500
+ .map((line) => line.trim())
501
+ .filter((line) => line && !line.startsWith('#'));
502
+ excludePatterns = [...excludePatterns, ...filePatterns];
503
+ } else {
504
+ console.warn(chalk.yellow(`Exclude file not found: ${excludeFilePath}`));
505
+ }
506
+ }
507
+
508
+ if (excludePatterns.length > 0) {
509
+ console.log(chalk.cyan(`Using ${excludePatterns.length} exclusion patterns.`));
510
+ }
511
+
512
+ // Log gitignore status
513
+ if (options.gitignore === false) {
514
+ console.log(chalk.yellow('Automatic .gitignore exclusion is disabled.'));
515
+ } else {
516
+ console.log(chalk.cyan('Respecting .gitignore patterns (if present).'));
517
+ }
518
+ console.log(chalk.green('Exclusion pattern processing complete.'));
519
+
520
+ // Get files to process
521
+ let filesToProcess = [];
522
+
523
+ if (options.files && options.files.length > 0) {
524
+ console.log(chalk.cyan('Processing specified files/patterns...'));
525
+ filesToProcess = await expandFilePatterns(options.files, baseDir);
526
+ console.log(chalk.green(`Expanded specified files/patterns to ${filesToProcess.length} files.`));
527
+ } else {
528
+ console.log(chalk.cyan(`Scanning directory for supported files: ${baseDir}`));
529
+ // Show spinner during file discovery
530
+ const scanSpinner = new Spinner('Scanning files... %s');
531
+ scanSpinner.setSpinnerString('|/-\\');
532
+ scanSpinner.start();
533
+ // Pass the processed exclusion patterns to findSupportedFiles
534
+ filesToProcess = await findSupportedFiles(baseDir, {
535
+ ...options,
536
+ excludePatterns, // Pass the processed patterns
537
+ });
538
+ scanSpinner.stop(true);
539
+ console.log(chalk.green(`Found ${filesToProcess.length} potential files in directory.`));
540
+ }
541
+
542
+ const fileDiscoveryTime = ((Date.now() - startTime) / 1000).toFixed(2);
543
+ console.log(chalk.gray(`File discovery took ${fileDiscoveryTime} seconds.`));
544
+
545
+ if (filesToProcess.length === 0) {
546
+ console.log(chalk.yellow('No files to process. Exiting.'));
547
+ return;
548
+ }
549
+
550
+ // Process files in batches
551
+ const concurrency = parseInt(options.concurrency || '10', 10); // Default concurrency
552
+ console.log(chalk.cyan(`Starting embedding generation for ${filesToProcess.length} files with concurrency: ${concurrency}`));
553
+ // Initialize spinner for live progress with more detailed information
554
+ const spinner = new Spinner('%s Processing files...');
555
+ spinner.setSpinnerString('⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'); // Use a more modern spinner
556
+ spinner.start();
557
+
558
+ // Track progress state
559
+ let processedCount = 0;
560
+ let skippedCount = 0;
561
+ let failedCount = 0;
562
+ let excludedCount = 0;
563
+
564
+ // Update spinner with detailed progress information
565
+ const updateSpinner = () => {
566
+ const totalProcessed = processedCount + skippedCount + failedCount + excludedCount;
567
+ const pct = Math.floor((totalProcessed / filesToProcess.length) * 100);
568
+ spinner.setSpinnerTitle(
569
+ `%s Embedding progress: ${pct}% (${totalProcessed}/${filesToProcess.length}) ` +
570
+ `[${chalk.green(`✓ ${processedCount}`)} | ` +
571
+ `${chalk.yellow(`↷ ${skippedCount + excludedCount}`)} | ` +
572
+ `${chalk.red(`✗ ${failedCount}`)}]`
573
+ );
574
+ };
575
+
576
+ // Start the progress update interval
577
+ const progressInterval = setInterval(updateSpinner, 100);
578
+
579
+ const results = await embeddingsSystem.processBatchEmbeddings(filesToProcess, {
580
+ concurrency,
581
+ verbose: options.verbose,
582
+ excludePatterns,
583
+ respectGitignore: options.gitignore !== false,
584
+ baseDir: baseDir,
585
+ batchSize: 100, // Set a reasonable batch size
586
+ maxLines: parseInt(options.maxLines || '1000', 10),
587
+ onProgress: (status) => {
588
+ // Update counters based on status
589
+ if (status === 'processed') {
590
+ processedCount++;
591
+ } else if (status === 'skipped') {
592
+ skippedCount++;
593
+ } else if (status === 'failed') {
594
+ failedCount++;
595
+ } else if (status === 'excluded') {
596
+ excludedCount++;
597
+ }
598
+
599
+ // Update the spinner with new progress information
600
+ updateSpinner();
601
+ },
602
+ });
603
+
604
+ // Clean up the progress display
605
+ clearInterval(progressInterval);
606
+ spinner.stop(true);
607
+
608
+ console.log(chalk.green(`\nEmbedding generation complete!`));
609
+ console.log(chalk.cyan(`Processed: ${results.processed} files`));
610
+ console.log(chalk.yellow(`Skipped: ${results.skipped} files (binary, too large, etc.)`));
611
+ console.log(chalk.yellow(`Excluded: ${results.excluded} files (gitignore, patterns)`));
612
+
613
+ if (results.failed > 0) {
614
+ console.log(chalk.red(`Failed: ${results.failed} files`));
615
+ }
616
+
617
+ // Generate project analysis after embeddings are complete
618
+ console.log(chalk.cyan('\n🏗️ Generating project architecture analysis...'));
619
+ const projectAnalyzer = new ProjectAnalyzer();
620
+ try {
621
+ const projectSummary = await projectAnalyzer.analyzeProject(projectDir, {
622
+ verbose: options.verbose,
623
+ forceAnalysis: options.forceAnalysis,
624
+ });
625
+
626
+ // Store project summary in embeddings system for later use
627
+ await embeddingsSystem.storeProjectSummary(projectDir, projectSummary);
628
+
629
+ console.log(chalk.green('✅ Project analysis complete and stored'));
630
+ if (options.verbose) {
631
+ console.log(chalk.gray(` Project: ${projectSummary.projectName}`));
632
+ console.log(
633
+ chalk.gray(
634
+ ` Technologies: ${projectSummary.technologies.slice(0, 5).join(', ')}${projectSummary.technologies.length > 5 ? '...' : ''}`
635
+ )
636
+ );
637
+ console.log(chalk.gray(` Key patterns: ${projectSummary.keyPatterns.length}`));
638
+ }
639
+ } catch (error) {
640
+ console.error(chalk.red('⚠️ Project analysis failed but continuing:'), error.message);
641
+ }
642
+
643
+ // Clean up resources to allow the process to exit naturally
644
+ console.log(chalk.cyan('Cleaning up resources...'));
645
+ await embeddingsSystem.cleanup();
646
+ console.log(chalk.green('Cleanup successful.'));
647
+ } catch (err) {
648
+ console.error(chalk.red('Error generating embeddings:'), err.message);
649
+ console.error(err.stack);
650
+ // Clean up resources even on error
651
+ try {
652
+ console.log(chalk.cyan('Cleaning up resources after error...'));
653
+ await embeddingsSystem.cleanup();
654
+ console.log(chalk.green('Cleanup successful.'));
655
+ } catch (cleanupErr) {
656
+ console.error(chalk.red('Error during cleanup:'), cleanupErr.message);
657
+ }
658
+ process.exit(1);
659
+ }
660
+ }
661
+
662
+ /**
663
+ * Clear stored embeddings for the current project
664
+ */
665
+ async function clearEmbeddings(options) {
666
+ try {
667
+ // Determine the working directory for project separation
668
+ // If --directory is specified, use that as the project directory
669
+ // Otherwise, use the current working directory
670
+ const projectDir = options.directory ? path.resolve(options.directory) : process.cwd();
671
+ console.log(chalk.cyan(`Clearing embeddings for project: ${projectDir}`));
672
+
673
+ // Call clearEmbeddings() with the determined project directory
674
+ await embeddingsSystem.clearEmbeddings(projectDir);
675
+
676
+ console.log(chalk.green('Project embeddings have been cleared.'));
677
+
678
+ // Clean up resources (only database connection since we skipped full initialization)
679
+ console.log(chalk.cyan('Cleaning up resources...'));
680
+ await embeddingsSystem.databaseManager.cleanup();
681
+ } catch (err) {
682
+ console.error(chalk.red('Error clearing embeddings:'), err.message);
683
+ console.error(err.stack);
684
+ // Clean up resources even on error (only database connection)
685
+ try {
686
+ await embeddingsSystem.databaseManager.cleanup();
687
+ } catch (cleanupErr) {
688
+ console.error(chalk.red('Error during cleanup:'), cleanupErr.message);
689
+ }
690
+ process.exit(1);
691
+ }
692
+ }
693
+
694
+ /**
695
+ * Show statistics about stored embeddings
696
+ */
697
+ async function showEmbeddingStats(options) {
698
+ try {
699
+ // Determine the working directory for project separation
700
+ // If --directory is specified, use that as the project directory
701
+ // Otherwise, use the current working directory (shows all projects)
702
+ const projectDir = options.directory ? path.resolve(options.directory) : process.cwd();
703
+
704
+ if (options.directory) {
705
+ console.log(chalk.cyan(`Fetching embedding statistics for project: ${projectDir}`));
706
+ } else {
707
+ console.log(chalk.cyan('Fetching embedding statistics for all projects...'));
708
+ }
709
+
710
+ const projectEmbeddings = embeddingsSystem.getProjectEmbeddings(projectDir);
711
+ const stats = await projectEmbeddings.getStats();
712
+
713
+ console.log(chalk.bold.blue('\nEmbedding Statistics:'));
714
+
715
+ if (!stats || Object.keys(stats).length === 0 || stats.totalCount === 0) {
716
+ console.log(chalk.yellow('No embeddings found or database is empty.'));
717
+ } else {
718
+ console.log(` ${chalk.cyan('Total Embeddings:')} ${chalk.green(stats.totalCount)}`);
719
+ if (stats.dimensions) {
720
+ console.log(` ${chalk.cyan('Vector Dimensions:')} ${chalk.green(stats.dimensions)}`);
721
+ }
722
+ if (stats.tables) {
723
+ console.log(` ${chalk.cyan('Tables/Collections:')}`);
724
+ for (const [table, count] of Object.entries(stats.tables)) {
725
+ console.log(` - ${chalk.cyan(table)}: ${chalk.green(count)} entries`);
726
+ }
727
+ }
728
+ if (stats.lastUpdated) {
729
+ console.log(` ${chalk.cyan('Last Updated:')} ${chalk.green(new Date(stats.lastUpdated).toLocaleString())}`);
730
+ }
731
+ }
732
+
733
+ // Clean up resources
734
+ // console.log(chalk.cyan('Cleaning up resources...'));
735
+ // await embeddingsSystem.cleanup();
736
+ } catch (err) {
737
+ console.error(chalk.red('Error fetching embedding statistics:'), err.message);
738
+ console.error(err.stack);
739
+ // Clean up resources even on error
740
+ // try {
741
+ // await embeddingsSystem.cleanup();
742
+ // } catch (cleanupErr) {
743
+ // console.error(chalk.red('Error during cleanup:'), cleanupErr.message);
744
+ // }
745
+ process.exit(1);
746
+ }
747
+ }
748
+
749
+ // --- Helper Functions --- //
750
+
751
+ /**
752
+ * Find all supported code files in a directory (using utils.shouldProcessFile)
753
+ *
754
+ * @param {string} directory - Directory to search
755
+ * @param {object} options - Options from generateEmbeddings command
756
+ * @returns {Promise<Array<string>>} Array of file paths
757
+ */
758
+ async function findSupportedFiles(directory, options = {}) {
759
+ const verbose = options.verbose || false;
760
+ const baseDir = path.resolve(directory);
761
+
762
+ // Default patterns match common code files - adjust as needed
763
+ const defaultPatterns = [
764
+ '**/*.js',
765
+ '**/*.jsx',
766
+ '**/*.ts',
767
+ '**/*.tsx',
768
+ '**/*.py',
769
+ '**/*.rb',
770
+ '**/*.java',
771
+ '**/*.go',
772
+ '**/*.php',
773
+ '**/*.cs',
774
+ '**/*.c',
775
+ '**/*.cpp',
776
+ '**/*.h',
777
+ '**/*.hpp',
778
+ '**/*.html',
779
+ '**/*.css',
780
+ '**/*.scss',
781
+ '**/*.json',
782
+ '**/*.md',
783
+ '**/*.yml',
784
+ '**/*.yaml',
785
+ '**/*.kt',
786
+ '**/*.sh',
787
+ '**/*.gradle',
788
+ // Add or remove patterns as appropriate for general projects
789
+ ];
790
+
791
+ // Combine default patterns with any user-provided pattern (though typically not used in directory mode)
792
+ const patternsToUse = options.filePattern ? [options.filePattern] : defaultPatterns;
793
+
794
+ // Define standard exclusions
795
+ const defaultExcludes = [
796
+ '**/node_modules/**',
797
+ '**/dist/**',
798
+ '**/build/**',
799
+ '**/.*/**',
800
+ '**/*.min.*',
801
+ '**/vendor/**',
802
+ '**/tmp/**',
803
+ '**/coverage/**',
804
+ '**/__pycache__/**',
805
+ // Add common large file types or directories often not needed for analysis
806
+ '**/*.log',
807
+ '**/*.lock',
808
+ '**/*.bak',
809
+ '**/package-lock.json',
810
+ '**/yarn.lock',
811
+ '**/assets/**',
812
+ '**/images/**',
813
+ '**/fonts/**',
814
+ ];
815
+
816
+ // Combine default exclusions with user-provided ones
817
+ const excludePatterns = options.excludePatterns ? [...defaultExcludes, ...options.excludePatterns] : defaultExcludes;
818
+
819
+ const globOptions = {
820
+ cwd: baseDir,
821
+ ignore: excludePatterns,
822
+ absolute: true, // Get absolute paths
823
+ nodir: true, // Exclude directories
824
+ dot: false, // Exclude dotfiles/dotdirectories unless explicitly included
825
+ follow: false, // Don't follow symlinks to avoid potential loops/issues
826
+ stat: true, // Get stats to check if it's a file
827
+ withFileTypes: false, // Not needed with stat:true
828
+ signal: AbortSignal.timeout(120000), // Add a timeout (e.g., 2 minutes) to prevent infinite hangs
829
+ };
830
+
831
+ // Note: We don't use glob's gitignore option because it's not working correctly
832
+ // Instead, we rely on the shouldProcessFile check in embeddings.js which uses git check-ignore
833
+ globOptions.ignore = [...excludePatterns]; // Use only explicit excludes
834
+
835
+ if (verbose) {
836
+ console.log(chalk.cyan('Using async glob to find files...'));
837
+ console.log(chalk.gray(` Patterns: ${patternsToUse.join(', ')}`));
838
+ console.log(chalk.gray(` Options:`), globOptions);
839
+ }
840
+
841
+ try {
842
+ // Use asynchronous glob
843
+ const files = await glob.glob(patternsToUse, globOptions);
844
+
845
+ if (verbose) {
846
+ console.log(chalk.green(`Glob found ${files.length} potential files.`));
847
+ }
848
+
849
+ // Filter results to ensure they are actual files (glob with stat should mostly handle this)
850
+ // And apply the final utilsShouldProcessFile check (e.g., for binary content if needed)
851
+ // const finalFiles = [];
852
+ // for (const file of files) {
853
+ // // The file path from glob should already be absolute
854
+ // try {
855
+ // // Basic check if it's a file (glob should have done this with nodir:true)
856
+ // // The `stat:true` option in glob might make fs.statSync redundant,
857
+ // // but double-checking is safe. However, let's rely on glob's filtering first.
858
+ // // Final check with utilsShouldProcessFile if it adds more filtering (e.g., content checks)
859
+ // // Pass baseDir for context if needed by exclusion logic
860
+ // if (utilsShouldProcessFile(file, '', { exclusionOptions: options.exclusionOptions, baseDir })) {
861
+ // finalFiles.push(file);
862
+ // }
863
+ // } catch (statError) {
864
+ // if (verbose) {
865
+ // console.warn(chalk.yellow(`Skipping file due to stat error ${path.relative(baseDir, file)}: ${statError.message}`));
866
+ // }
867
+ // }
868
+ // }
869
+
870
+ // Rely directly on glob results since it handles gitignore and exclusions
871
+ const finalFiles = files;
872
+
873
+ // Add log after the filtering loop (now just assignment)
874
+ if (verbose) {
875
+ console.log(chalk.green(`Finished filtering glob results. ${finalFiles.length} files remain.`));
876
+ }
877
+ return finalFiles;
878
+ } catch (err) {
879
+ if (err.name === 'AbortError') {
880
+ console.error(chalk.red('Glob operation timed out. The directory might be too large or complex.'));
881
+ } else {
882
+ console.error(chalk.red(`Error during glob file search: ${err.message}`));
883
+ }
884
+ console.error(err.stack); // Log stack for debugging
885
+ return []; // Return empty array on error
886
+ }
887
+ }
888
+
889
+ /**
890
+ * Expand file patterns to actual file paths, ensuring they exist.
891
+ *
892
+ * @param {Array<string>} patterns - File patterns to expand
893
+ * @param {string} baseDir - The base directory for resolving relative patterns
894
+ * @returns {Array<string>} Array of absolute file paths
895
+ */
896
+ async function expandFilePatterns(patterns, baseDir = process.cwd()) {
897
+ try {
898
+ const files = new Set(); // Use a Set to avoid duplicates
899
+ for (const pattern of patterns) {
900
+ // Resolve the pattern relative to the base directory
901
+ const absolutePattern = path.resolve(baseDir, pattern);
902
+
903
+ // Check if it's a direct file path first
904
+ if (fs.existsSync(absolutePattern) && fs.statSync(absolutePattern).isFile()) {
905
+ files.add(absolutePattern);
906
+ } else {
907
+ // Treat as a glob pattern
908
+ // Use the original pattern with baseDir as cwd for correct globbing
909
+ const matchedFiles = await glob.glob(pattern, { cwd: baseDir, absolute: true, nodir: true });
910
+ matchedFiles.forEach((file) => {
911
+ // Final check if file exists and is a file
912
+ if (fs.existsSync(file) && fs.statSync(file).isFile()) {
913
+ files.add(file);
914
+ }
915
+ });
916
+ }
917
+ }
918
+ return Array.from(files);
919
+ } catch (err) {
920
+ console.error(chalk.red('Error expanding file patterns:'), err.message);
921
+ return [];
922
+ }
923
+ }
924
+
925
+ /**
926
+ * Get list of files changed in a branch compared to the base branch (main/master).
927
+ * This shows what changes the specified branch has compared to the base.
928
+ *
929
+ * @param {string} branch - Branch to analyze (the feature/target branch)
930
+ * @param {string} workingDir - Directory to run git commands in (optional, defaults to cwd)
931
+ * @returns {Array<string>} Array of changed file paths relative to git root
932
+ */
933
+ function getChangedFiles(branch, workingDir = process.cwd()) {
934
+ try {
935
+ // Get git root directory
936
+ const gitRoot = execSync('git rev-parse --show-toplevel', { cwd: workingDir }).toString().trim();
937
+ console.log(chalk.gray(`Git repository: ${gitRoot}`));
938
+
939
+ // Ensure the branch exists locally (fetch if needed)
940
+ ensureBranchExists(branch, workingDir);
941
+
942
+ // Find the base branch (main/master)
943
+ const baseBranch = findBaseBranch(workingDir);
944
+
945
+ // Ensure the base branch exists locally as well (crucial for diff operations)
946
+ try {
947
+ ensureBranchExists(baseBranch, workingDir);
948
+ } catch (error) {
949
+ console.warn(chalk.yellow(`Warning: Could not ensure base branch '${baseBranch}' exists locally: ${error.message}`));
950
+ // Continue with the original baseBranch name, it might work with remote refs
951
+ }
952
+
953
+ console.log(chalk.gray(`Comparing ${branch} against ${baseBranch}...`));
954
+
955
+ // Use three-dot notation to get changes in branch compared to base
956
+ // This shows commits that are in 'branch' but not in 'baseBranch'
957
+ // By adding --diff-filter=d, we exclude deleted files from the list.
958
+ const gitOutput = execGitSafe('git diff', ['--name-only', '--diff-filter=d', `${baseBranch}...${branch}`], { cwd: gitRoot }).toString();
959
+
960
+ // Split, filter empty lines, resolve paths, and check existence
961
+ const changedFiles = gitOutput
962
+ .split('\n')
963
+ .filter((file) => file)
964
+ .map((file) => path.resolve(gitRoot, file)); // Get absolute path
965
+
966
+ if (changedFiles.length > 0) {
967
+ console.log(chalk.gray(`Found ${changedFiles.length} changed files in ${branch} vs ${baseBranch}`));
968
+ }
969
+
970
+ return changedFiles;
971
+ } catch (err) {
972
+ console.error(chalk.red('Error getting git diff:'), err.message);
973
+ return [];
974
+ }
975
+ }
976
+
977
+ // REMOVED: getFileDiff function - Diffing handled within LLM or specific review modes if needed.
978
+ // REMOVED: checkBranchExists function - Moved to utils.js
979
+
980
+ // --- Output Formatting Functions --- //
981
+ // These need to be adapted to the structure returned by cag-review.js functions
982
+
983
+ /**
984
+ * Output results in JSON format
985
+ *
986
+ * @param {Array<Object>} reviewResults - Array of individual file review results from cag-review
987
+ * @param {Object} cliOptions - Command line options
988
+ */
989
+ function outputJson(reviewResults, options) {
990
+ // Structure the output to be informative
991
+ const output = {
992
+ summary: {
993
+ totalFilesReviewed: reviewResults.length,
994
+ filesWithIssues: reviewResults.filter((r) => r.success && !r.skipped && r.results?.issues?.length > 0).length,
995
+ totalIssues: reviewResults.reduce((sum, r) => sum + (r.results?.issues?.length || 0), 0),
996
+ issuesWithCodeSuggestions: reviewResults.reduce((sum, r) => {
997
+ if (!r.success || r.skipped || !r.results?.issues) return sum;
998
+ return sum + r.results.issues.filter((issue) => issue.codeSuggestion).length;
999
+ }, 0),
1000
+ skippedFiles: reviewResults.filter((r) => r.skipped).length,
1001
+ errorFiles: reviewResults.filter((r) => !r.success).length,
1002
+ },
1003
+ details: reviewResults.map((r) => {
1004
+ if (!r.success) {
1005
+ return { filePath: r.filePath, success: false, error: r.error };
1006
+ }
1007
+ if (r.skipped) {
1008
+ return { filePath: r.filePath, success: true, skipped: true };
1009
+ }
1010
+ // Include key details from the successful analysis (including code suggestions)
1011
+ return {
1012
+ filePath: r.filePath,
1013
+ success: true,
1014
+ language: r.language,
1015
+ review: r.results, // Contains summary, issues (with optional codeSuggestion), positives from LLM
1016
+ // Optionally include similar examples if needed
1017
+ // similarExamplesUsed: r.similarExamples
1018
+ };
1019
+ }),
1020
+ };
1021
+
1022
+ const jsonOutput = JSON.stringify(output, null, 2);
1023
+
1024
+ // If output-file is specified, write to file instead of stdout
1025
+ if (options && options.outputFile) {
1026
+ fs.writeFileSync(options.outputFile, jsonOutput, 'utf8');
1027
+ console.log(chalk.green(`JSON output saved to: ${options.outputFile}`));
1028
+ } else {
1029
+ // Write JSON output to stdout (process.stdout is not buffered)
1030
+ process.stdout.write(jsonOutput);
1031
+ }
1032
+ }
1033
+
1034
+ /**
1035
+ * Output results in Markdown format
1036
+ *
1037
+ * @param {Array<Object>} reviewResults - Array of individual file review results
1038
+ * @param {Object} cliOptions - Command line options
1039
+ */
1040
+ function outputMarkdown(reviewResults) {
1041
+ console.log('# AI Code Review Results (RAG Approach)\n');
1042
+
1043
+ const totalFiles = reviewResults.length;
1044
+ const filesWithIssues = reviewResults.filter((r) => r.success && !r.skipped && r.results?.issues?.length > 0).length;
1045
+ const totalIssues = reviewResults.reduce((sum, r) => sum + (r.results?.issues?.length || 0), 0);
1046
+ const skippedFiles = reviewResults.filter((r) => r.skipped).length;
1047
+ const errorFiles = reviewResults.filter((r) => !r.success).length;
1048
+
1049
+ console.log('## Summary\n');
1050
+ console.log(`- **Files Analyzed:** ${totalFiles}`);
1051
+ console.log(`- **Files with Issues:** ${filesWithIssues}`);
1052
+ console.log(`- **Total Issues Found:** ${totalIssues}`);
1053
+ if (skippedFiles > 0) console.log(`- **Files Skipped:** ${skippedFiles}`);
1054
+ if (errorFiles > 0) console.log(`- **Errors:** ${errorFiles}`);
1055
+ console.log('\n');
1056
+
1057
+ console.log('## Detailed Review per File\n');
1058
+
1059
+ reviewResults.forEach((fileResult) => {
1060
+ console.log(`### ${fileResult.filePath}\n`);
1061
+ if (!fileResult.success) {
1062
+ console.log(`**Error:** ${fileResult.error}\n`);
1063
+ return;
1064
+ }
1065
+ if (fileResult.skipped) {
1066
+ console.log(`*Skipped (based on exclusion patterns or file type).*\n`);
1067
+ return;
1068
+ }
1069
+ if (!fileResult.results || (!fileResult.results.issues?.length && !fileResult.results.positives?.length)) {
1070
+ console.log(`*No significant findings or issues reported.*\n`);
1071
+ if (fileResult.results?.summary) {
1072
+ console.log(`**Summary:** ${fileResult.results.summary}\n`);
1073
+ }
1074
+ return;
1075
+ }
1076
+
1077
+ const review = fileResult.results;
1078
+ if (review.summary) {
1079
+ console.log(`**Summary:** ${review.summary}\n`);
1080
+ }
1081
+
1082
+ if (review.issues && review.issues.length > 0) {
1083
+ console.log(`**Issues Found (${review.issues.length}):**\n`);
1084
+ review.issues.forEach((issue) => {
1085
+ const severityEmoji = getSeverityEmoji(issue.severity);
1086
+ console.log(
1087
+ `- **[${issue.severity.toUpperCase()}] ${severityEmoji} (Lines: ${issue.lineNumbers?.join(', ') || 'N/A'})**: ${
1088
+ issue.description
1089
+ }`
1090
+ );
1091
+ if (issue.suggestion) {
1092
+ console.log(`\n *Suggestion:* ${issue.suggestion}\n`);
1093
+ }
1094
+ // Include code suggestion if available
1095
+ if (issue.codeSuggestion) {
1096
+ const { startLine, endLine, newCode } = issue.codeSuggestion;
1097
+ const lineRange = endLine ? `${startLine}-${endLine}` : `${startLine}`;
1098
+ console.log(`\n **Suggested change (lines ${lineRange}):**\n`);
1099
+ console.log(' ```suggestion');
1100
+ console.log(
1101
+ newCode
1102
+ .split('\n')
1103
+ .map((line) => ` ${line}`)
1104
+ .join('\n')
1105
+ );
1106
+ console.log(' ```\n');
1107
+ }
1108
+ });
1109
+ }
1110
+
1111
+ if (review.positives && review.positives.length > 0) {
1112
+ console.log(`**Positives Found (${review.positives.length}):**\n`);
1113
+ review.positives.forEach((positive) => {
1114
+ console.log(` - ${positive}\n`);
1115
+ });
1116
+ }
1117
+ });
1118
+ }
1119
+
1120
+ /**
1121
+ * Output results in text format with colors
1122
+ *
1123
+ * @param {Array<Object>} reviewResults - Array of individual file review results
1124
+ * @param {Object} cliOptions - Command line options
1125
+ */
1126
+ function outputText(reviewResults, cliOptions) {
1127
+ const totalFiles = reviewResults.length;
1128
+ const filesWithIssues = reviewResults.filter((r) => r.success && !r.skipped && r.results?.issues?.length > 0).length;
1129
+ const totalIssues = reviewResults.reduce((sum, r) => sum + (r.results?.issues?.length || 0), 0);
1130
+ const skippedFiles = reviewResults.filter((r) => r.skipped).length;
1131
+ const errorFiles = reviewResults.filter((r) => !r.success).length;
1132
+
1133
+ console.log(chalk.bold.blue('\n===== AI Code Review Summary ====='));
1134
+ console.log(`Files Analyzed: ${chalk.bold(totalFiles)}`);
1135
+ console.log(`Files with Issues: ${chalk.bold(filesWithIssues)}`);
1136
+ console.log(`Total Issues Found: ${chalk.bold(totalIssues)}`);
1137
+ if (skippedFiles > 0) console.log(`Files Skipped: ${chalk.yellow(skippedFiles)}`);
1138
+ if (errorFiles > 0) console.log(`Errors: ${chalk.red(errorFiles)}`);
1139
+ console.log(chalk.bold.blue('================================================'));
1140
+
1141
+ reviewResults.forEach((fileResult) => {
1142
+ if (!fileResult.success) {
1143
+ console.log(chalk.bold.red(`\n===== Error reviewing ${fileResult.filePath} =====`));
1144
+ console.log(chalk.red(fileResult.error));
1145
+ console.log(chalk.bold.red('================================================'));
1146
+ return;
1147
+ }
1148
+ if (fileResult.skipped) {
1149
+ if (cliOptions.verbose) {
1150
+ console.log(chalk.yellow(`\nSkipped: ${fileResult.filePath}`));
1151
+ }
1152
+ return;
1153
+ }
1154
+ if (!fileResult.results || (!fileResult.results.issues?.length && !fileResult.results.positives?.length)) {
1155
+ if (cliOptions.verbose) {
1156
+ console.log(chalk.green(`\nNo findings for: ${fileResult.filePath}`));
1157
+ if (fileResult.results?.summary) {
1158
+ console.log(chalk.green(` Summary: ${fileResult.results.summary}`));
1159
+ }
1160
+ }
1161
+ return;
1162
+ }
1163
+
1164
+ console.log(chalk.bold.underline(`\n===== Review for ${fileResult.filePath} =====`));
1165
+ const review = fileResult.results;
1166
+
1167
+ if (review.summary) {
1168
+ console.log(chalk.bold.cyan(`Summary: ${review.summary}`));
1169
+ }
1170
+
1171
+ if (review.issues && review.issues.length > 0) {
1172
+ console.log(chalk.bold.yellow('\nIssues:'));
1173
+ review.issues.forEach((issue) => {
1174
+ const severityColor = getSeverityColor(issue.severity);
1175
+ console.log(` ${severityColor(`[${issue.severity.toUpperCase()}]`)} (Lines: ${issue.lineNumbers?.join(', ') || 'N/A'})`);
1176
+ console.log(` ${issue.description}`);
1177
+ if (issue.suggestion) {
1178
+ console.log(` ${chalk.green(`Suggestion: ${issue.suggestion}`)}`);
1179
+ }
1180
+ // Display code suggestion if available
1181
+ if (issue.codeSuggestion) {
1182
+ const { startLine, endLine, oldCode, newCode } = issue.codeSuggestion;
1183
+ const lineRange = endLine ? `${startLine}-${endLine}` : `${startLine}`;
1184
+ console.log(` ${chalk.cyan(`Code Suggestion (lines ${lineRange}):`)}`);
1185
+ console.log(chalk.gray(' Old:'));
1186
+ oldCode.split('\n').forEach((line) => console.log(chalk.gray(` ${line}`)));
1187
+ console.log(chalk.green(' New:'));
1188
+ newCode.split('\n').forEach((line) => console.log(chalk.green(` ${line}`)));
1189
+ }
1190
+ console.log(''); // Add spacing
1191
+ });
1192
+ }
1193
+
1194
+ if (review.positives && review.positives.length > 0) {
1195
+ console.log(chalk.bold.green('\nPositives:'));
1196
+ review.positives.forEach((positive) => {
1197
+ console.log(` - ${positive}`);
1198
+ });
1199
+ console.log('');
1200
+ }
1201
+ console.log(chalk.gray(`========================================${'='.repeat(fileResult.filePath.length)}`));
1202
+ });
1203
+ }
1204
+
1205
+ // --- Severity Helpers (Remain Unchanged) --- //
1206
+
1207
+ /**
1208
+ * Get color function for severity level
1209
+ *
1210
+ * @param {string} severity - Severity level
1211
+ * @returns {Function} Chalk color function
1212
+ */
1213
+ function getSeverityColor(severity = 'low') {
1214
+ // Add default
1215
+ switch (severity.toLowerCase()) {
1216
+ case 'critical':
1217
+ return chalk.bold.red;
1218
+ case 'high':
1219
+ return chalk.red;
1220
+ case 'medium':
1221
+ return chalk.bold.yellow;
1222
+ case 'low':
1223
+ return chalk.yellow;
1224
+ case 'info':
1225
+ return chalk.bold.blue;
1226
+ default:
1227
+ return chalk.blue;
1228
+ }
1229
+ }
1230
+
1231
+ /**
1232
+ * Get emoji for severity level (for markdown output)
1233
+ *
1234
+ * @param {string} severity - Severity level
1235
+ * @returns {string} Emoji representing severity
1236
+ */
1237
+ function getSeverityEmoji(severity = 'low') {
1238
+ // Add default
1239
+ switch (severity.toLowerCase()) {
1240
+ case 'critical':
1241
+ return '🚨'; // Critical
1242
+ case 'high':
1243
+ return '🔥'; // High
1244
+ case 'medium':
1245
+ return '⚠️'; // Medium
1246
+ case 'low':
1247
+ return '💡'; // Low / Info
1248
+ case 'info':
1249
+ return 'ℹ️'; // Explicit Info
1250
+ default:
1251
+ return '•';
1252
+ }
1253
+ }
1254
+
1255
+ // ============================================================================
1256
+ // PR HISTORY ANALYSIS FUNCTIONS
1257
+ // ============================================================================
1258
+
1259
+ /**
1260
+ * Analyze PR comment history for a repository
1261
+ * @param {Object} options - CLI options
1262
+ */
1263
+ async function analyzePRHistory(options) {
1264
+ const startTime = Date.now();
1265
+
1266
+ try {
1267
+ console.log(chalk.bold.blue('AI Code Review - PR History Analysis'));
1268
+
1269
+ // Get repository and project path using utility functions
1270
+ const { repository, projectPath } = getRepositoryAndProjectPath(options);
1271
+ console.log(chalk.cyan(`Project directory: ${projectPath}`));
1272
+
1273
+ // Validate GitHub token
1274
+ const token = validateGitHubToken(options);
1275
+
1276
+ // Initialize analyzer
1277
+ const analyzer = new PRHistoryAnalyzer({
1278
+ concurrency: options.concurrency || 2,
1279
+ batchSize: options.batchSize || 50,
1280
+ skipDependabot: true,
1281
+ includeDrafts: false,
1282
+ });
1283
+
1284
+ analyzer.initialize(token);
1285
+
1286
+ // Prepare analysis options
1287
+ const analysisOptions = {
1288
+ since: options.since,
1289
+ until: options.until,
1290
+ limit: options.limit,
1291
+ resume: options.resume,
1292
+ clearExisting: options.clear,
1293
+ projectPath,
1294
+ onProgress: (progress) => displayProgress(progress, options.verbose),
1295
+ };
1296
+
1297
+ console.log(chalk.blue(`Starting analysis for ${repository}...`));
1298
+
1299
+ // Run analysis
1300
+ const results = await analyzer.analyzeRepository(repository, analysisOptions);
1301
+
1302
+ const endTime = Date.now();
1303
+ const duration = ((endTime - startTime) / 1000).toFixed(2);
1304
+
1305
+ // Display results using utility function
1306
+ displayAnalysisResults(results, duration);
1307
+ console.log(chalk.bold.green(`\nPR history analysis complete for ${repository}!`));
1308
+ } catch (error) {
1309
+ const endTime = Date.now();
1310
+ const duration = ((endTime - startTime) / 1000).toFixed(2);
1311
+ console.error(chalk.red(`\nError during PR history analysis (${duration}s):`), error.message);
1312
+ if (options.verbose) {
1313
+ console.error(error.stack);
1314
+ }
1315
+ process.exit(1);
1316
+ }
1317
+ }
1318
+
1319
+ /**
1320
+ * Get PR analysis status for a repository
1321
+ * @param {Object} options - CLI options
1322
+ */
1323
+ async function getPRHistoryStatus(options) {
1324
+ try {
1325
+ console.log(chalk.bold.blue('AI Code Review - PR History Status'));
1326
+
1327
+ // Get repository and project path using utility functions
1328
+ const { repository, projectPath } = getRepositoryAndProjectPath(options);
1329
+ console.log(chalk.cyan(`Project directory: ${projectPath}`));
1330
+
1331
+ // Create analyzer instance to get status
1332
+ const analyzer = new PRHistoryAnalyzer();
1333
+ const status = await analyzer.getProgressStatus(repository);
1334
+
1335
+ // Display status using utility function
1336
+ displayStatus(status);
1337
+
1338
+ // Check database for stored comments
1339
+ const hasComments = await hasPRComments(repository, projectPath);
1340
+
1341
+ if (hasComments) {
1342
+ const stats = await getPRCommentsStats(repository, projectPath);
1343
+ displayDatabaseStats(stats, hasComments);
1344
+ } else {
1345
+ displayDatabaseStats(null, hasComments);
1346
+ }
1347
+ } catch (error) {
1348
+ console.error(chalk.red('Error getting PR history status:'), error.message);
1349
+ process.exit(1);
1350
+ }
1351
+ }
1352
+
1353
+ /**
1354
+ * Clear PR analysis data for a repository
1355
+ * @param {Object} options - CLI options
1356
+ */
1357
+ async function clearPRHistory(options) {
1358
+ try {
1359
+ console.log(chalk.bold.blue('AI Code Review - Clear PR History Data'));
1360
+
1361
+ // Get repository and project path using utility functions
1362
+ const { repository, projectPath } = getRepositoryAndProjectPath(options);
1363
+ console.log(chalk.cyan(`Project directory: ${projectPath}`));
1364
+ console.log(chalk.cyan(`Repository: ${repository}`));
1365
+
1366
+ // Check if data exists before confirmation
1367
+ const hasComments = await hasPRComments(repository, projectPath);
1368
+
1369
+ if (!hasComments) {
1370
+ console.log(chalk.yellow(`No PR analysis data found for ${repository}`));
1371
+ return;
1372
+ }
1373
+
1374
+ // Get stats for confirmation message
1375
+ const stats = await getPRCommentsStats(repository, projectPath);
1376
+ console.log(chalk.yellow('\nData to be cleared:'));
1377
+ console.log(chalk.yellow(` - ${stats.totalComments} comments`));
1378
+ console.log(chalk.yellow(` - ${stats.totalPRs} pull requests`));
1379
+ console.log(chalk.yellow(` - ${stats.uniqueAuthors} unique authors`));
1380
+ console.log(chalk.yellow(` - Date range: ${stats.dateRange.earliest} to ${stats.dateRange.latest}`));
1381
+
1382
+ // Confirmation prompt (unless --force flag is used)
1383
+ if (!options.force) {
1384
+ const rl = readline.createInterface({
1385
+ input: process.stdin,
1386
+ output: process.stdout,
1387
+ });
1388
+
1389
+ const answer = await new Promise((resolve) => {
1390
+ rl.question(chalk.red('\nThis will permanently delete all PR analysis data. Continue? (y/N): '), resolve);
1391
+ });
1392
+
1393
+ rl.close();
1394
+
1395
+ if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
1396
+ console.log(chalk.cyan('Operation cancelled.'));
1397
+ return;
1398
+ }
1399
+ }
1400
+
1401
+ // Clear the data
1402
+ console.log(chalk.blue('Clearing PR analysis data...'));
1403
+
1404
+ const cleared = await clearPRComments(repository, projectPath);
1405
+
1406
+ if (cleared) {
1407
+ console.log(chalk.bold.green(`\nPR analysis data cleared successfully for ${repository}`));
1408
+ } else {
1409
+ console.log(chalk.yellow('No data was found to clear.'));
1410
+ }
1411
+ } catch (error) {
1412
+ console.error(chalk.red('Error clearing PR history data:'), error.message);
1413
+ if (options.verbose) {
1414
+ console.error(error.stack);
1415
+ }
1416
+ process.exit(1);
1417
+ }
1418
+ }