codecritique 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1145 -0
- package/package.json +98 -0
- package/src/content-retrieval.js +747 -0
- package/src/custom-documents.js +597 -0
- package/src/embeddings/cache-manager.js +364 -0
- package/src/embeddings/constants.js +40 -0
- package/src/embeddings/database.js +921 -0
- package/src/embeddings/errors.js +208 -0
- package/src/embeddings/factory.js +447 -0
- package/src/embeddings/file-processor.js +851 -0
- package/src/embeddings/model-manager.js +337 -0
- package/src/embeddings/similarity-calculator.js +97 -0
- package/src/embeddings/types.js +113 -0
- package/src/feedback-loader.js +384 -0
- package/src/index.js +1418 -0
- package/src/llm.js +123 -0
- package/src/pr-history/analyzer.js +579 -0
- package/src/pr-history/bot-detector.js +123 -0
- package/src/pr-history/cli-utils.js +204 -0
- package/src/pr-history/comment-processor.js +549 -0
- package/src/pr-history/database.js +819 -0
- package/src/pr-history/github-client.js +629 -0
- package/src/project-analyzer.js +955 -0
- package/src/rag-analyzer.js +2764 -0
- package/src/rag-review.js +566 -0
- package/src/technology-keywords.json +753 -0
- package/src/utils/command.js +48 -0
- package/src/utils/constants.js +263 -0
- package/src/utils/context-inference.js +364 -0
- package/src/utils/document-detection.js +105 -0
- package/src/utils/file-validation.js +271 -0
- package/src/utils/git.js +232 -0
- package/src/utils/language-detection.js +170 -0
- package/src/utils/logging.js +24 -0
- package/src/utils/markdown.js +132 -0
- package/src/utils/mobilebert-tokenizer.js +141 -0
- package/src/utils/pr-chunking.js +276 -0
- package/src/utils/string-utils.js +28 -0
- package/src/zero-shot-classifier-open.js +392 -0
|
@@ -0,0 +1,2764 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Analyzer Module
|
|
3
|
+
*
|
|
4
|
+
* This module provides functionality for analyzing code using context
|
|
5
|
+
* extracted by the Retrieval Augmented Generation (RAG) approach for code review.
|
|
6
|
+
* It identifies patterns, best practices, and generates review comments.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import chalk from 'chalk';
|
|
12
|
+
import { getDefaultEmbeddingsSystem } from './embeddings/factory.js';
|
|
13
|
+
import { calculateCosineSimilarity } from './embeddings/similarity-calculator.js';
|
|
14
|
+
import {
|
|
15
|
+
loadFeedbackData,
|
|
16
|
+
shouldSkipSimilarIssue,
|
|
17
|
+
extractDismissedPatterns,
|
|
18
|
+
generateFeedbackContext,
|
|
19
|
+
initializeSemanticSimilarity,
|
|
20
|
+
isSemanticSimilarityAvailable,
|
|
21
|
+
} from './feedback-loader.js';
|
|
22
|
+
import * as llm from './llm.js';
|
|
23
|
+
import { findRelevantPRComments } from './pr-history/database.js';
|
|
24
|
+
import { inferContextFromCodeContent, inferContextFromDocumentContent } from './utils/context-inference.js';
|
|
25
|
+
import { isGenericDocument, getGenericDocumentContext } from './utils/document-detection.js';
|
|
26
|
+
import { isTestFile, shouldProcessFile } from './utils/file-validation.js';
|
|
27
|
+
import { detectFileType, detectLanguageFromExtension } from './utils/language-detection.js';
|
|
28
|
+
import { debug } from './utils/logging.js';
|
|
29
|
+
|
|
30
|
+
// Constants for content processing
|
|
31
|
+
const MAX_QUERY_CONTEXT_LENGTH = 1500;
|
|
32
|
+
const MAX_EMBEDDING_CONTENT_LENGTH = 10000;
|
|
33
|
+
const DEFAULT_TRUNCATE_LINES = 300;
|
|
34
|
+
const GUIDELINE_TRUNCATE_LINES = 400;
|
|
35
|
+
const MAX_PR_COMMENTS_FOR_CONTEXT = 15;
|
|
36
|
+
|
|
37
|
+
// Create embeddings system instance
|
|
38
|
+
const embeddingsSystem = getDefaultEmbeddingsSystem();
|
|
39
|
+
|
|
40
|
+
// Track if semantic similarity has been initialized
|
|
41
|
+
let semanticSimilarityInitialized = false;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Initialize semantic similarity for feedback filtering
|
|
45
|
+
* Uses the shared embeddings system from feedback-loader.js
|
|
46
|
+
*/
|
|
47
|
+
async function ensureSemanticSimilarityInitialized() {
|
|
48
|
+
if (semanticSimilarityInitialized) {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
try {
|
|
53
|
+
// Initialize semantic similarity using the shared embeddings system
|
|
54
|
+
await initializeSemanticSimilarity();
|
|
55
|
+
semanticSimilarityInitialized = true;
|
|
56
|
+
} catch (error) {
|
|
57
|
+
console.log(chalk.yellow(`⚠️ Could not initialize semantic similarity: ${error.message}`));
|
|
58
|
+
// Continue without semantic similarity - word-based fallback will be used
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ============================================================================
|
|
63
|
+
// COMMON PROMPT INSTRUCTIONS
|
|
64
|
+
// ============================================================================
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Generate the common critical rules block for all prompts
|
|
68
|
+
* @param {Object} options - Options for customization
|
|
69
|
+
* @param {string} options.importRuleContext - Context-specific text for import rule ('code', 'test', or 'pr')
|
|
70
|
+
* @returns {string} Critical rules block
|
|
71
|
+
*/
|
|
72
|
+
function getCriticalRulesBlock(options = {}) {
|
|
73
|
+
const { importRuleContext = 'code' } = options;
|
|
74
|
+
|
|
75
|
+
// Customize import rule based on context
|
|
76
|
+
let importRuleText;
|
|
77
|
+
switch (importRuleContext) {
|
|
78
|
+
case 'test':
|
|
79
|
+
importRuleText =
|
|
80
|
+
'DO NOT flag missing imports or files referenced in import statements as issues. Focus only on test quality, logic, and patterns within the provided test files.';
|
|
81
|
+
break;
|
|
82
|
+
case 'pr':
|
|
83
|
+
importRuleText =
|
|
84
|
+
'DO NOT flag missing imports or files referenced in import statements as issues. In PR analysis, some files (especially assets like images, fonts, or excluded files) may not be included in the review scope. Focus only on code quality, logic, and patterns within the provided PR files.';
|
|
85
|
+
break;
|
|
86
|
+
default:
|
|
87
|
+
importRuleText =
|
|
88
|
+
'DO NOT flag missing imports or files referenced in import statements as issues. Focus only on code quality, logic, and patterns within the provided files.';
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return `**🚨 CRITICAL: LINE NUMBER REPORTING RULE - READ CAREFULLY 🚨**
|
|
92
|
+
When reporting issues in the JSON output, NEVER provide exhaustive lists of line numbers. For repeated issues, list only 3-5 representative line numbers maximum. Exhaustive line number lists are considered errors and must be avoided.
|
|
93
|
+
|
|
94
|
+
**🚨 CRITICAL: IMPORT STATEMENT RULE - READ CAREFULLY 🚨**
|
|
95
|
+
${importRuleText}
|
|
96
|
+
|
|
97
|
+
**🚨 CRITICAL: NO LOW SEVERITY ISSUES - READ CAREFULLY 🚨**
|
|
98
|
+
DO NOT report "low" severity issues. Low severity issues typically include:
|
|
99
|
+
- Import statement ordering or grouping
|
|
100
|
+
- Code formatting and whitespace
|
|
101
|
+
- Minor stylistic preferences
|
|
102
|
+
- Comment placement or formatting
|
|
103
|
+
- Line length or wrapping suggestions
|
|
104
|
+
These concerns are handled by project linters (ESLint, Prettier, etc.) and should NOT be included in your review.
|
|
105
|
+
Only report issues with severity: "critical", "high", or "medium".
|
|
106
|
+
|
|
107
|
+
**🚨 CRITICAL: ACTIONABLE CODE ISSUES ONLY - NO VERIFICATION REQUESTS 🚨**
|
|
108
|
+
Your review must contain ONLY issues where you have identified a DEFINITE problem and can provide a SPECIFIC code fix.
|
|
109
|
+
|
|
110
|
+
**AUTOMATIC REJECTION - If your suggestion contains ANY of these phrases, DO NOT include it:**
|
|
111
|
+
- "Verify that..." / "Verify the..." / "Verify if..."
|
|
112
|
+
- "Ensure that..." / "Ensure the..."
|
|
113
|
+
- "Confirm that..." / "Confirm the..."
|
|
114
|
+
- "Validate that..." / "Validate the..."
|
|
115
|
+
- "Check that..." / "Check if..." / "Check whether..."
|
|
116
|
+
- "Add a comment explaining..." / "Add documentation..."
|
|
117
|
+
- "Review the documentation..." / "Reference the migration guide..."
|
|
118
|
+
- "Consider whether..." / "Consider if..."
|
|
119
|
+
- "This could potentially..." / "This might..." / "This may..."
|
|
120
|
+
- "If this is intentional..." / "If this change is to fix..."
|
|
121
|
+
- "...should be validated" / "...should be verified"
|
|
122
|
+
- "...but there's no validation..." / "...but there's no verification..."
|
|
123
|
+
|
|
124
|
+
**THE RULE**: If you cannot point to a SPECIFIC BUG or SPECIFIC VIOLATION and provide EXACT CODE to fix it, do not report it.
|
|
125
|
+
|
|
126
|
+
**GOOD issue**: "The function returns null on line 42 but the return type doesn't allow null. Fix: Change return type to \`string | null\`"
|
|
127
|
+
**BAD issue**: "Verify that the function handles null correctly" (This asks for verification, not a code fix)
|
|
128
|
+
**BAD issue**: "The type cast may bypass type safety" (This expresses uncertainty - "may" - without identifying a definite problem)
|
|
129
|
+
**BAD issue**: "Add a comment explaining why this type was changed" (This requests documentation, not a code fix)
|
|
130
|
+
|
|
131
|
+
When in doubt, leave it out. Only report issues you are CERTAIN about.`;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Generate the common citation requirement block
|
|
136
|
+
* @returns {string} Citation requirement block
|
|
137
|
+
*/
|
|
138
|
+
function getCitationRequirementBlock() {
|
|
139
|
+
return `**🚨 CRITICAL CITATION REQUIREMENT 🚨**
|
|
140
|
+
When you identify issues that violate custom instructions provided at the beginning of this prompt, you MUST:
|
|
141
|
+
- Include the source document name in your issue description (e.g., "violates the coding standards specified in '[Document Name]'")
|
|
142
|
+
- Reference the source document in your suggestion (e.g., "as required by '[Document Name]'" or "according to '[Document Name]'")
|
|
143
|
+
- Do NOT provide generic suggestions - always tie violations back to the specific custom instruction source`;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Generate the common code suggestions format block
|
|
148
|
+
* @returns {string} Code suggestions format block
|
|
149
|
+
*/
|
|
150
|
+
function getCodeSuggestionsFormatBlock() {
|
|
151
|
+
return `**🚨 CODE SUGGESTIONS FORMAT 🚨**
|
|
152
|
+
When suggesting code changes, you can optionally include a codeSuggestion object with:
|
|
153
|
+
- startLine: The starting line number of the code to replace
|
|
154
|
+
- endLine: (optional) The ending line number if replacing multiple lines
|
|
155
|
+
- oldCode: The exact current code that should be replaced (must match exactly)
|
|
156
|
+
- newCode: The proposed replacement code
|
|
157
|
+
|
|
158
|
+
Code suggestions enable reviewers to apply fixes directly as GitHub suggestions. Only provide code suggestions when:
|
|
159
|
+
1. The fix is concrete and can be applied automatically
|
|
160
|
+
2. You have the exact current code from the file content
|
|
161
|
+
3. The suggestion is a direct code replacement (not architectural changes)`;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Generate the final reminder block for custom instructions
|
|
166
|
+
* @returns {string} Final reminder block
|
|
167
|
+
*/
|
|
168
|
+
function getFinalReminderBlock() {
|
|
169
|
+
return `**FINAL REMINDER: If custom instructions were provided at the start of this prompt, they MUST be followed and take precedence over all other guidelines.**`;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Format custom docs section for prompts
|
|
174
|
+
* @param {Array} customDocs - Array of custom document chunks
|
|
175
|
+
* @returns {string} Formatted custom docs section
|
|
176
|
+
*/
|
|
177
|
+
function formatCustomDocsSection(customDocs) {
|
|
178
|
+
if (!customDocs || customDocs.length === 0) {
|
|
179
|
+
return '';
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
let section = `
|
|
183
|
+
|
|
184
|
+
CRITICAL: CUSTOM INSTRUCTIONS - FOLLOW THESE BEFORE ALL OTHER INSTRUCTIONS
|
|
185
|
+
=====================================================================
|
|
186
|
+
|
|
187
|
+
`;
|
|
188
|
+
|
|
189
|
+
// Group chunks by document title to provide better context
|
|
190
|
+
const chunksByDocument = new Map();
|
|
191
|
+
customDocs.forEach((doc) => {
|
|
192
|
+
const title = doc.document_title || doc.title;
|
|
193
|
+
if (!chunksByDocument.has(title)) {
|
|
194
|
+
chunksByDocument.set(title, []);
|
|
195
|
+
}
|
|
196
|
+
chunksByDocument.get(title).push(doc);
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
chunksByDocument.forEach((chunks, docTitle) => {
|
|
200
|
+
section += `
|
|
201
|
+
### AUTHORITATIVE CUSTOM INSTRUCTION: "${docTitle}"
|
|
202
|
+
|
|
203
|
+
IMPORTANT: This is an authoritative document that defines mandatory review standards for this project.
|
|
204
|
+
When you find violations of these standards, you MUST cite "${docTitle}" as the source in your response.
|
|
205
|
+
|
|
206
|
+
`;
|
|
207
|
+
chunks.forEach((chunk, index) => {
|
|
208
|
+
section += `
|
|
209
|
+
**Section ${index + 1}${chunk.chunk_index !== undefined ? ` (Chunk ${chunk.chunk_index + 1})` : ''}:**
|
|
210
|
+
|
|
211
|
+
${chunk.content}
|
|
212
|
+
|
|
213
|
+
`;
|
|
214
|
+
});
|
|
215
|
+
section += `
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
`;
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
section += `
|
|
222
|
+
=====================================================================
|
|
223
|
+
END OF CUSTOM INSTRUCTIONS - These are authoritative project guidelines that take precedence over all other standards
|
|
224
|
+
`;
|
|
225
|
+
|
|
226
|
+
return section;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Build role definition with custom instructions references
|
|
231
|
+
* @param {string} baseRole - Base role description
|
|
232
|
+
* @param {Array} customDocs - Array of custom document chunks
|
|
233
|
+
* @param {string} reviewType - Type of review ('code', 'test', or 'pr')
|
|
234
|
+
* @returns {string} Complete role definition
|
|
235
|
+
*/
|
|
236
|
+
function buildRoleDefinition(baseRole, customDocs, reviewType = 'code') {
|
|
237
|
+
let roleDefinition = baseRole;
|
|
238
|
+
|
|
239
|
+
if (customDocs && customDocs.length > 0) {
|
|
240
|
+
const docTitles = [...new Set(customDocs.map((doc) => doc.document_title || doc.title))];
|
|
241
|
+
const reviewTypeText = reviewType === 'test' ? 'test reviews' : reviewType === 'pr' ? 'PR reviews' : 'review';
|
|
242
|
+
|
|
243
|
+
roleDefinition += `\n\nIMPORTANT: You have been given specific custom instructions that define how you should conduct your ${reviewTypeText}:`;
|
|
244
|
+
docTitles.forEach((title, index) => {
|
|
245
|
+
roleDefinition += `\n\n**CUSTOM INSTRUCTION SOURCE ${index + 1}: "${title}"**`;
|
|
246
|
+
roleDefinition += `\nThis contains specific instructions for your ${reviewType === 'test' ? 'test review' : 'review'} approach and criteria.`;
|
|
247
|
+
});
|
|
248
|
+
roleDefinition +=
|
|
249
|
+
'\n\nThese custom instructions define your review methodology and must be followed throughout your analysis. When you apply these instructions, reference the source document that informed your decision.';
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return roleDefinition;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Format code examples for prompts
|
|
257
|
+
* @param {Array} codeExamples - Array of code examples
|
|
258
|
+
* @param {string} labelPrefix - Label prefix (e.g., 'CODE EXAMPLE', 'TEST EXAMPLE')
|
|
259
|
+
* @returns {string} Formatted code examples
|
|
260
|
+
*/
|
|
261
|
+
function formatCodeExamplesBlock(codeExamples, labelPrefix = 'CODE EXAMPLE') {
|
|
262
|
+
if (!codeExamples || codeExamples.length === 0) {
|
|
263
|
+
return labelPrefix.includes('TEST') ? 'No relevant test examples found.' : 'No relevant code examples found.';
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
return codeExamples
|
|
267
|
+
.map((ex) => {
|
|
268
|
+
const langIdentifier = ex.language || '';
|
|
269
|
+
return `
|
|
270
|
+
${labelPrefix} ${ex.index} (Similarity: ${ex.similarity})
|
|
271
|
+
Path: ${ex.path}
|
|
272
|
+
Language: ${ex.language}
|
|
273
|
+
|
|
274
|
+
\`\`\`${langIdentifier}
|
|
275
|
+
${ex.content}
|
|
276
|
+
\`\`\`
|
|
277
|
+
`;
|
|
278
|
+
})
|
|
279
|
+
.join('\n');
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Format guideline snippets for prompts
|
|
284
|
+
* @param {Array} guidelineSnippets - Array of guideline snippets
|
|
285
|
+
* @param {string} labelPrefix - Label prefix (e.g., 'GUIDELINE', 'TESTING GUIDELINE')
|
|
286
|
+
* @returns {string} Formatted guideline snippets
|
|
287
|
+
*/
|
|
288
|
+
function formatGuidelinesBlock(guidelineSnippets, labelPrefix = 'GUIDELINE') {
|
|
289
|
+
if (!guidelineSnippets || guidelineSnippets.length === 0) {
|
|
290
|
+
return labelPrefix.includes('TESTING') ? 'No specific testing guideline snippets found.' : 'No specific guideline snippets found.';
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
return guidelineSnippets
|
|
294
|
+
.map((ex) => {
|
|
295
|
+
const langIdentifier = ex.language || 'text';
|
|
296
|
+
let title = `${labelPrefix} ${ex.index} (Source: ${ex.path}, Similarity: ${ex.similarity})`;
|
|
297
|
+
if (ex.headingText) {
|
|
298
|
+
title += `, Heading: "${ex.headingText}"`;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return `
|
|
302
|
+
${title}
|
|
303
|
+
|
|
304
|
+
\`\`\`${langIdentifier}
|
|
305
|
+
${ex.content}
|
|
306
|
+
\`\`\`
|
|
307
|
+
`;
|
|
308
|
+
})
|
|
309
|
+
.join('\n');
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// ============================================================================
|
|
313
|
+
// END COMMON PROMPT INSTRUCTIONS
|
|
314
|
+
// ============================================================================
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Get project summary for the given project path
|
|
318
|
+
* @param {string} projectPath - Project path
|
|
319
|
+
* @returns {Promise<Object|null>} Project summary or null
|
|
320
|
+
*/
|
|
321
|
+
async function getProjectSummary(projectPath) {
|
|
322
|
+
const resolvedPath = path.resolve(projectPath);
|
|
323
|
+
|
|
324
|
+
try {
|
|
325
|
+
// Retrieve from database
|
|
326
|
+
const summary = await embeddingsSystem.getProjectSummary(resolvedPath);
|
|
327
|
+
|
|
328
|
+
if (summary) {
|
|
329
|
+
console.log(chalk.cyan(`📋 Retrieved project summary for: ${path.basename(resolvedPath)}`));
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return summary;
|
|
333
|
+
} catch (error) {
|
|
334
|
+
console.error(chalk.red(`Error retrieving project summary: ${error.message}`));
|
|
335
|
+
return null;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Format project summary for LLM context
|
|
341
|
+
* @param {Object} summary - Project summary object
|
|
342
|
+
* @returns {string} Formatted context string
|
|
343
|
+
*/
|
|
344
|
+
function formatProjectSummaryForLLM(summary) {
|
|
345
|
+
if (!summary) return '';
|
|
346
|
+
|
|
347
|
+
let context = `\n## PROJECT ARCHITECTURE CONTEXT\n\n`;
|
|
348
|
+
|
|
349
|
+
context += `**Project:** ${summary.projectName || 'Unknown'} (${summary.projectType || 'Unknown'})\n`;
|
|
350
|
+
|
|
351
|
+
// Safe access to technologies array
|
|
352
|
+
if (summary.technologies && Array.isArray(summary.technologies) && summary.technologies.length > 0) {
|
|
353
|
+
context += `**Technologies:** ${summary.technologies.slice(0, 8).join(', ')}${summary.technologies.length > 8 ? '...' : ''}\n`;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Safe access to mainFrameworks array
|
|
357
|
+
if (summary.mainFrameworks && Array.isArray(summary.mainFrameworks) && summary.mainFrameworks.length > 0) {
|
|
358
|
+
context += `**Main Frameworks:** ${summary.mainFrameworks.join(', ')}\n`;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
context += '\n';
|
|
362
|
+
|
|
363
|
+
if (summary.customImplementations && Array.isArray(summary.customImplementations) && summary.customImplementations.length > 0) {
|
|
364
|
+
context += `**Custom Implementations to Recognize:**\n`;
|
|
365
|
+
summary.customImplementations.forEach((impl, i) => {
|
|
366
|
+
if (i < 5 && impl) {
|
|
367
|
+
// Limit to top 5 to avoid overwhelming the LLM
|
|
368
|
+
context += `- **${impl.name || 'Unknown'}**: ${impl.description || 'No description'}\n`;
|
|
369
|
+
if (impl.properties && Array.isArray(impl.properties) && impl.properties.length > 0) {
|
|
370
|
+
context += ` Properties: ${impl.properties.slice(0, 3).join(', ')}\n`;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
});
|
|
374
|
+
context += '\n';
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (summary.apiPatterns && Array.isArray(summary.apiPatterns) && summary.apiPatterns.length > 0) {
|
|
378
|
+
context += `**API Patterns:**\n`;
|
|
379
|
+
summary.apiPatterns.forEach((pattern) => {
|
|
380
|
+
if (pattern) {
|
|
381
|
+
context += `- ${pattern.type || 'Unknown'}: ${pattern.description || 'No description'}\n`;
|
|
382
|
+
}
|
|
383
|
+
});
|
|
384
|
+
context += '\n';
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
if (summary.stateManagement && summary.stateManagement.approach && summary.stateManagement.approach !== 'Unknown') {
|
|
388
|
+
context += `**State Management:** ${summary.stateManagement.approach}\n`;
|
|
389
|
+
if (
|
|
390
|
+
summary.stateManagement.patterns &&
|
|
391
|
+
Array.isArray(summary.stateManagement.patterns) &&
|
|
392
|
+
summary.stateManagement.patterns.length > 0
|
|
393
|
+
) {
|
|
394
|
+
context += `- Patterns: ${summary.stateManagement.patterns.join(', ')}\n`;
|
|
395
|
+
}
|
|
396
|
+
context += '\n';
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
if (summary.reviewGuidelines && Array.isArray(summary.reviewGuidelines) && summary.reviewGuidelines.length > 0) {
|
|
400
|
+
context += `**Project-Specific Review Guidelines:**\n`;
|
|
401
|
+
summary.reviewGuidelines.slice(0, 6).forEach((guideline) => {
|
|
402
|
+
if (guideline) {
|
|
403
|
+
context += `- ${guideline}\n`;
|
|
404
|
+
}
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
return context;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Helper function for truncating content with line count
|
|
412
|
+
function truncateContent(content, maxLines = DEFAULT_TRUNCATE_LINES) {
|
|
413
|
+
const lines = content.split('\n');
|
|
414
|
+
if (lines.length > maxLines) {
|
|
415
|
+
return {
|
|
416
|
+
content: lines.slice(0, maxLines).join('\n') + `\n... (truncated, ${lines.length - maxLines} more lines)`,
|
|
417
|
+
wasTruncated: true,
|
|
418
|
+
originalLineCount: lines.length,
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
return {
|
|
422
|
+
content: content,
|
|
423
|
+
wasTruncated: false,
|
|
424
|
+
originalLineCount: lines.length,
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Helper function for formatting context items (code examples or guidelines)
|
|
429
|
+
function formatContextItems(items, type = 'code') {
|
|
430
|
+
return items.map((item, idx) => {
|
|
431
|
+
// Format similarity score
|
|
432
|
+
const similarityFormatted = typeof item.similarity === 'number' ? item.similarity.toFixed(2) : 'N/A';
|
|
433
|
+
|
|
434
|
+
// Truncate content based on type
|
|
435
|
+
const maxLines = type === 'guideline' ? GUIDELINE_TRUNCATE_LINES : DEFAULT_TRUNCATE_LINES;
|
|
436
|
+
const truncated = truncateContent(item.content, maxLines);
|
|
437
|
+
|
|
438
|
+
const baseFormatted = {
|
|
439
|
+
index: idx + 1,
|
|
440
|
+
path: item.path,
|
|
441
|
+
similarity: similarityFormatted,
|
|
442
|
+
language: item.language || (type === 'guideline' ? 'text' : 'unknown'),
|
|
443
|
+
content: truncated.content,
|
|
444
|
+
};
|
|
445
|
+
|
|
446
|
+
// Add type-specific fields
|
|
447
|
+
if (type === 'guideline') {
|
|
448
|
+
baseFormatted.headingText = item.headingText || null;
|
|
449
|
+
baseFormatted.type = item.type || 'documentation';
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
return baseFormatted;
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// --- Helper: createGuidelineQueryForLLMRetrieval ---
|
|
457
|
+
function createGuidelineQueryForLLMRetrieval(codeSnippet, reviewedSnippetContext, language) {
|
|
458
|
+
const codeContext = codeSnippet.substring(0, MAX_QUERY_CONTEXT_LENGTH); // Limit snippet length in query
|
|
459
|
+
let query = 'Retrieve technical documentation, architectural guidelines, and best practices. ';
|
|
460
|
+
|
|
461
|
+
if (
|
|
462
|
+
reviewedSnippetContext.area !== 'Unknown' &&
|
|
463
|
+
reviewedSnippetContext.area !== 'GeneralJS_TS' &&
|
|
464
|
+
reviewedSnippetContext.area !== 'General'
|
|
465
|
+
) {
|
|
466
|
+
query += `Specifically looking for ${reviewedSnippetContext.area} related information. `;
|
|
467
|
+
}
|
|
468
|
+
if (reviewedSnippetContext.dominantTech.length > 0) {
|
|
469
|
+
query += `Focus on technologies like: ${reviewedSnippetContext.dominantTech.join(', ')}. `;
|
|
470
|
+
}
|
|
471
|
+
const generalKeywords = reviewedSnippetContext.keywords.filter(
|
|
472
|
+
(kw) => !reviewedSnippetContext.dominantTech.map((t) => t.toLowerCase()).includes(kw.toLowerCase())
|
|
473
|
+
);
|
|
474
|
+
if (generalKeywords.length > 0) {
|
|
475
|
+
query += `Consider relevance to concepts such as: ${generalKeywords.slice(0, 3).join(', ')}. `;
|
|
476
|
+
}
|
|
477
|
+
query += `Relevant to the following ${language} code snippet context: \\n\`\`\`${language}\\n${codeContext}...\\n\`\`\``;
|
|
478
|
+
return query;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// --- Helper: createTestGuidelineQueryForLLMRetrieval ---
|
|
482
|
+
function createTestGuidelineQueryForLLMRetrieval(codeSnippet, reviewedSnippetContext, language) {
|
|
483
|
+
const codeContext = codeSnippet.substring(0, MAX_QUERY_CONTEXT_LENGTH); // Limit snippet length in query
|
|
484
|
+
let query = 'Retrieve testing documentation, test patterns, and testing best practices. ';
|
|
485
|
+
|
|
486
|
+
query += 'Focus on test coverage, test naming conventions, assertion patterns, mocking strategies, and test organization. ';
|
|
487
|
+
|
|
488
|
+
if (
|
|
489
|
+
reviewedSnippetContext.area !== 'Unknown' &&
|
|
490
|
+
reviewedSnippetContext.area !== 'GeneralJS_TS' &&
|
|
491
|
+
reviewedSnippetContext.area !== 'General'
|
|
492
|
+
) {
|
|
493
|
+
query += `Specifically looking for ${reviewedSnippetContext.area} testing patterns and practices. `;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
if (reviewedSnippetContext.dominantTech.length > 0) {
|
|
497
|
+
query += `Focus on testing frameworks and patterns for: ${reviewedSnippetContext.dominantTech.join(', ')}. `;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
const testingKeywords = [
|
|
501
|
+
'test',
|
|
502
|
+
'spec',
|
|
503
|
+
'mock',
|
|
504
|
+
'stub',
|
|
505
|
+
'assertion',
|
|
506
|
+
'coverage',
|
|
507
|
+
'fixture',
|
|
508
|
+
'beforeEach',
|
|
509
|
+
'afterEach',
|
|
510
|
+
'describe',
|
|
511
|
+
'it',
|
|
512
|
+
'expect',
|
|
513
|
+
];
|
|
514
|
+
const relevantKeywords = reviewedSnippetContext.keywords.filter((kw) => testingKeywords.some((tk) => kw.toLowerCase().includes(tk)));
|
|
515
|
+
|
|
516
|
+
if (relevantKeywords.length > 0) {
|
|
517
|
+
query += `Consider testing concepts such as: ${relevantKeywords.slice(0, 3).join(', ')}. `;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
query += `Relevant to the following ${language} test file context: \\n\`\`\`${language}\\n${codeContext}...\\n\`\`\``;
|
|
521
|
+
return query;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Run an analysis using the RAG approach (single file or holistic PR)
|
|
526
|
+
*
|
|
527
|
+
* @param {string} filePath - Path to the file to analyze, or a special marker for PR reviews
|
|
528
|
+
* @param {Object} options - Analysis options
|
|
529
|
+
* @returns {Promise<Object>} Analysis results
|
|
530
|
+
*/
|
|
531
|
+
async function runAnalysis(filePath, options = {}) {
|
|
532
|
+
try {
|
|
533
|
+
// Check if this is a holistic PR review
|
|
534
|
+
if (options.isHolisticPRReview && filePath === 'PR_HOLISTIC_REVIEW') {
|
|
535
|
+
console.log(chalk.blue(`Performing holistic PR review for ${options.prFiles?.length || 0} files`));
|
|
536
|
+
return await performHolisticPRAnalysis(options);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
console.log(chalk.blue(`Analyzing file: ${filePath}`));
|
|
540
|
+
|
|
541
|
+
// Load feedback data if feedback tracking is enabled
|
|
542
|
+
let feedbackData = {};
|
|
543
|
+
if (options.trackFeedback && options.feedbackPath) {
|
|
544
|
+
console.log(chalk.cyan('--- Loading Feedback Data ---'));
|
|
545
|
+
feedbackData = await loadFeedbackData(options.feedbackPath, { verbose: options.verbose });
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
// Check if file exists
|
|
549
|
+
if (!fs.existsSync(filePath)) {
|
|
550
|
+
throw new Error(`File not found: ${filePath}`);
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// Read file content - use diff content if this is a diff-only review
|
|
554
|
+
let content;
|
|
555
|
+
let fullFileContent;
|
|
556
|
+
if (options.diffOnly && options.diffContent) {
|
|
557
|
+
content = options.diffContent;
|
|
558
|
+
// For PR reviews, always read the full file content for context awareness
|
|
559
|
+
fullFileContent = fs.existsSync(filePath) ? fs.readFileSync(filePath, 'utf8') : null;
|
|
560
|
+
console.log(chalk.blue(`Analyzing diff only for ${path.basename(filePath)}`));
|
|
561
|
+
} else {
|
|
562
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
563
|
+
fullFileContent = content;
|
|
564
|
+
console.log(chalk.blue(`Analyzing full file ${path.basename(filePath)}`));
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
// Check if file should be processed
|
|
568
|
+
if (!shouldProcessFile(filePath, content)) {
|
|
569
|
+
console.log(chalk.yellow(`Skipping file based on exclusion patterns: ${filePath}`));
|
|
570
|
+
return {
|
|
571
|
+
success: true,
|
|
572
|
+
skipped: true,
|
|
573
|
+
message: 'File skipped based on exclusion patterns',
|
|
574
|
+
};
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// --- Stage 1: CONTEXT RETRIEVAL ---
|
|
578
|
+
console.log(chalk.blue('--- Stage 1: Context Retrieval ---'));
|
|
579
|
+
const {
|
|
580
|
+
language,
|
|
581
|
+
isTestFile,
|
|
582
|
+
finalCodeExamples,
|
|
583
|
+
finalGuidelineSnippets,
|
|
584
|
+
prCommentContext,
|
|
585
|
+
prContextAvailable,
|
|
586
|
+
relevantCustomDocChunks,
|
|
587
|
+
} = await getContextForFile(filePath, content, options);
|
|
588
|
+
|
|
589
|
+
// --- Stage 1.5: PROJECT ARCHITECTURE CONTEXT ---
|
|
590
|
+
console.log(chalk.blue('--- Stage 1.5: Retrieving Project Architecture Context ---'));
|
|
591
|
+
const projectPath = options.projectPath || process.cwd();
|
|
592
|
+
const projectSummary = await getProjectSummary(projectPath);
|
|
593
|
+
|
|
594
|
+
// --- Stage 2: PREPARE CONTEXT FOR LLM ---
|
|
595
|
+
console.log(chalk.blue('--- Stage 2: Preparing Context for LLM ---'));
|
|
596
|
+
|
|
597
|
+
// Format the lists that will be passed
|
|
598
|
+
const formattedCodeExamples = formatContextItems(finalCodeExamples, 'code');
|
|
599
|
+
const formattedGuidelines = formatContextItems(finalGuidelineSnippets, 'guideline');
|
|
600
|
+
|
|
601
|
+
// --- Log the context being sent to the LLM --- >
|
|
602
|
+
console.log(chalk.magenta('--- Guidelines Sent to LLM ---'));
|
|
603
|
+
if (formattedGuidelines.length > 0) {
|
|
604
|
+
formattedGuidelines.forEach((g, i) => {
|
|
605
|
+
console.log(chalk.magenta(` [${i + 1}] Path: ${g.path} ${g.headingText ? `(Heading: "${g.headingText}")` : ''}`));
|
|
606
|
+
console.log(chalk.gray(` Content: ${g.content.substring(0, 100).replace(/\\n/g, ' ')}...`));
|
|
607
|
+
});
|
|
608
|
+
} else {
|
|
609
|
+
console.log(chalk.magenta(' (None)'));
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
console.log(chalk.magenta('--- Code Examples Sent to LLM ---'));
|
|
613
|
+
if (finalCodeExamples.length > 0) {
|
|
614
|
+
finalCodeExamples.forEach((ex, i) => {
|
|
615
|
+
console.log(chalk.magenta(` [${i + 1}] Path: ${ex.path} (Similarity: ${ex.similarity?.toFixed(3) || 'N/A'})`));
|
|
616
|
+
console.log(chalk.gray(` Content: ${ex.content.substring(0, 100).replace(/\\n/g, ' ')}...`));
|
|
617
|
+
});
|
|
618
|
+
} else {
|
|
619
|
+
console.log(chalk.magenta(' (None)'));
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
console.log(chalk.magenta('--- Custom Document Chunks Sent to LLM ---'));
|
|
623
|
+
if (relevantCustomDocChunks && relevantCustomDocChunks.length > 0) {
|
|
624
|
+
relevantCustomDocChunks.forEach((chunk, i) => {
|
|
625
|
+
console.log(chalk.magenta(` [${i + 1}] Document: "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`));
|
|
626
|
+
console.log(chalk.magenta(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`));
|
|
627
|
+
console.log(chalk.gray(` Content: ${chunk.content.substring(0, 100).replace(/\\n/g, ' ')}...`));
|
|
628
|
+
});
|
|
629
|
+
} else {
|
|
630
|
+
console.log(chalk.magenta(' (None)'));
|
|
631
|
+
}
|
|
632
|
+
console.log(chalk.magenta('---------------------------------'));
|
|
633
|
+
// --- End Logging --->
|
|
634
|
+
|
|
635
|
+
// Prepare context for LLM with the potentially reduced lists
|
|
636
|
+
const context = prepareContextForLLM(
|
|
637
|
+
filePath,
|
|
638
|
+
content,
|
|
639
|
+
language,
|
|
640
|
+
// Pass the formatted lists
|
|
641
|
+
formattedCodeExamples,
|
|
642
|
+
formattedGuidelines, // Always pass the formatted guidelines
|
|
643
|
+
prCommentContext, // Pass PR comment context
|
|
644
|
+
{ ...options, isTestFile, relevantCustomDocChunks, feedbackData, projectSummary, fullFileContent } // Pass full file content for context
|
|
645
|
+
);
|
|
646
|
+
|
|
647
|
+
// Call LLM for analysis
|
|
648
|
+
const analysisResults = await callLLMForAnalysis(context, { ...options, isTestFile, feedbackData });
|
|
649
|
+
|
|
650
|
+
// Filter out low severity issues (formatting/style concerns handled by linters)
|
|
651
|
+
// Note: The LLM prompt instructs not to generate low severity issues, but this filter
|
|
652
|
+
// serves as a safety net in case any slip through despite the prompt instructions
|
|
653
|
+
const lowSeverityFiltered = filterLowSeverityIssues(analysisResults, { verbose: options.verbose });
|
|
654
|
+
|
|
655
|
+
// Post-process results to filter dismissed issues
|
|
656
|
+
let filteredResults = lowSeverityFiltered;
|
|
657
|
+
if (options.trackFeedback && feedbackData && Object.keys(feedbackData).length > 0) {
|
|
658
|
+
console.log(chalk.cyan('--- Filtering Results Based on Feedback ---'));
|
|
659
|
+
filteredResults = await filterAnalysisResults(lowSeverityFiltered, feedbackData, {
|
|
660
|
+
similarityThreshold: options.feedbackThreshold || 0.7,
|
|
661
|
+
verbose: options.verbose,
|
|
662
|
+
});
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
return {
|
|
666
|
+
success: true,
|
|
667
|
+
filePath,
|
|
668
|
+
language,
|
|
669
|
+
results: filteredResults,
|
|
670
|
+
context: {
|
|
671
|
+
codeExamples: finalCodeExamples.length,
|
|
672
|
+
guidelines: finalGuidelineSnippets.length,
|
|
673
|
+
prComments: prCommentContext.length,
|
|
674
|
+
prContextAvailable,
|
|
675
|
+
},
|
|
676
|
+
prHistory: prContextAvailable
|
|
677
|
+
? {
|
|
678
|
+
commentsFound: prCommentContext.length,
|
|
679
|
+
patterns: extractCommentPatterns(prCommentContext),
|
|
680
|
+
summary: generateContextSummary(prCommentContext, extractCommentPatterns(prCommentContext)),
|
|
681
|
+
}
|
|
682
|
+
: null,
|
|
683
|
+
similarExamples: finalCodeExamples.map((ex) => ({
|
|
684
|
+
path: ex.path,
|
|
685
|
+
similarity: ex.similarity,
|
|
686
|
+
})),
|
|
687
|
+
metadata: {
|
|
688
|
+
analysisTimestamp: new Date().toISOString(),
|
|
689
|
+
featuresUsed: {
|
|
690
|
+
codeExamples: finalCodeExamples.length > 0,
|
|
691
|
+
guidelines: finalGuidelineSnippets.length > 0,
|
|
692
|
+
prHistory: prContextAvailable,
|
|
693
|
+
feedbackFiltering: options.trackFeedback && Object.keys(feedbackData).length > 0,
|
|
694
|
+
},
|
|
695
|
+
...(filteredResults.metadata || {}),
|
|
696
|
+
},
|
|
697
|
+
};
|
|
698
|
+
} catch (error) {
|
|
699
|
+
console.error(chalk.red(`Error analyzing file: ${error.message}`));
|
|
700
|
+
return {
|
|
701
|
+
success: false,
|
|
702
|
+
error: error.message,
|
|
703
|
+
filePath,
|
|
704
|
+
};
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
/**
|
|
709
|
+
* Prepare context for LLM analysis
|
|
710
|
+
*
|
|
711
|
+
* @param {string} filePath - Path to the file
|
|
712
|
+
* @param {string} content - File content
|
|
713
|
+
* @param {string} language - File language
|
|
714
|
+
* @param {Array<Object>} codeExamples - Processed list of code examples
|
|
715
|
+
* @param {Array<Object>} guidelineSnippets - Processed list of guideline snippets
|
|
716
|
+
* @param {Array<Object>} prCommentContext - PR comment context
|
|
717
|
+
* @param {Object} options - Options
|
|
718
|
+
* @returns {Object} Context for LLM
|
|
719
|
+
*/
|
|
720
|
+
function prepareContextForLLM(filePath, content, language, finalCodeExamples, finalGuidelineSnippets, prCommentContext = [], options = {}) {
|
|
721
|
+
const { customDocs, relevantCustomDocChunks, feedbackData, projectSummary } = options;
|
|
722
|
+
|
|
723
|
+
// Extract file name and directory
|
|
724
|
+
const fileName = path.basename(filePath);
|
|
725
|
+
const dirPath = path.dirname(filePath);
|
|
726
|
+
const dirName = path.basename(dirPath);
|
|
727
|
+
|
|
728
|
+
// Determine if this is a diff-only review
|
|
729
|
+
const isDiffReview = options.diffOnly && options.diffContent;
|
|
730
|
+
const reviewType = isDiffReview ? 'DIFF REVIEW' : 'FULL FILE REVIEW';
|
|
731
|
+
|
|
732
|
+
// For PR reviews, we need both the full file content and the diff
|
|
733
|
+
// content represents the diff (what to review)
|
|
734
|
+
// options.fullFileContent represents the complete file context
|
|
735
|
+
const fullFileContent = isDiffReview && options.fullFileContent ? options.fullFileContent : content;
|
|
736
|
+
|
|
737
|
+
// Format similar code examples and guideline snippets
|
|
738
|
+
const codeExamples = formatContextItems(finalCodeExamples, 'code');
|
|
739
|
+
const guidelineSnippets = formatContextItems(finalGuidelineSnippets, 'guideline');
|
|
740
|
+
|
|
741
|
+
const contextSections = [];
|
|
742
|
+
|
|
743
|
+
// Add existing context sections
|
|
744
|
+
if (codeExamples.length > 0) {
|
|
745
|
+
contextSections.push({
|
|
746
|
+
title: 'Similar Code Examples',
|
|
747
|
+
description: 'Code patterns from the project that are similar to the file being reviewed',
|
|
748
|
+
items: codeExamples,
|
|
749
|
+
});
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
if (guidelineSnippets.length > 0) {
|
|
753
|
+
contextSections.push({
|
|
754
|
+
title: 'Project Guidelines',
|
|
755
|
+
description: 'Documentation and guidelines relevant to this code',
|
|
756
|
+
items: guidelineSnippets,
|
|
757
|
+
});
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Add PR Comment Context Section
|
|
761
|
+
if (prCommentContext && prCommentContext.length > 0) {
|
|
762
|
+
contextSections.push({
|
|
763
|
+
title: 'Historical Review Comments',
|
|
764
|
+
description: 'Similar code patterns and issues identified by human reviewers in past PRs',
|
|
765
|
+
items: prCommentContext,
|
|
766
|
+
});
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
// Add feedback context if available
|
|
770
|
+
const dismissedPatterns = feedbackData ? extractDismissedPatterns(feedbackData, { maxPatterns: 10 }) : [];
|
|
771
|
+
if (dismissedPatterns.length > 0) {
|
|
772
|
+
contextSections.push({
|
|
773
|
+
title: 'Dismissed Issue Patterns',
|
|
774
|
+
description: 'Types of issues previously dismissed or marked as not relevant by users',
|
|
775
|
+
items: dismissedPatterns.map((pattern, index) => ({
|
|
776
|
+
index: index + 1,
|
|
777
|
+
issue: pattern.issue,
|
|
778
|
+
reason: pattern.reason,
|
|
779
|
+
sentiment: pattern.sentiment,
|
|
780
|
+
})),
|
|
781
|
+
});
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
return {
|
|
785
|
+
file: {
|
|
786
|
+
path: filePath,
|
|
787
|
+
name: fileName,
|
|
788
|
+
directory: dirPath,
|
|
789
|
+
directoryName: dirName,
|
|
790
|
+
language,
|
|
791
|
+
content,
|
|
792
|
+
fullFileContent, // Include full file content for context awareness
|
|
793
|
+
reviewType: reviewType,
|
|
794
|
+
isDiffReview: isDiffReview,
|
|
795
|
+
// Add PR context if available
|
|
796
|
+
...(options.prContext && {
|
|
797
|
+
prContext: {
|
|
798
|
+
totalFiles: options.prContext.totalFiles,
|
|
799
|
+
testFiles: options.prContext.testFiles,
|
|
800
|
+
sourceFiles: options.prContext.sourceFiles,
|
|
801
|
+
allFiles: options.prContext.allFiles,
|
|
802
|
+
},
|
|
803
|
+
}),
|
|
804
|
+
// Add diff-specific info if this is a diff review
|
|
805
|
+
...(isDiffReview &&
|
|
806
|
+
options.diffInfo && {
|
|
807
|
+
diffInfo: {
|
|
808
|
+
addedLines: options.diffInfo.addedLines.length,
|
|
809
|
+
removedLines: options.diffInfo.removedLines.length,
|
|
810
|
+
baseBranch: options.baseBranch,
|
|
811
|
+
targetBranch: options.targetBranch,
|
|
812
|
+
},
|
|
813
|
+
}),
|
|
814
|
+
},
|
|
815
|
+
context: contextSections,
|
|
816
|
+
codeExamples,
|
|
817
|
+
guidelineSnippets,
|
|
818
|
+
customDocs: relevantCustomDocChunks || customDocs, // Use relevant chunks if available, fallback to full docs
|
|
819
|
+
feedbackContext: generateFeedbackContext(dismissedPatterns), // Add feedback context for LLM
|
|
820
|
+
projectSummary: projectSummary, // Add project architecture summary
|
|
821
|
+
metadata: {
|
|
822
|
+
hasCodeExamples: finalCodeExamples.length > 0,
|
|
823
|
+
hasGuidelines: finalGuidelineSnippets.length > 0,
|
|
824
|
+
hasPRHistory: prCommentContext.length > 0,
|
|
825
|
+
hasFeedbackContext: dismissedPatterns.length > 0,
|
|
826
|
+
hasProjectSummary: !!projectSummary,
|
|
827
|
+
analysisTimestamp: new Date().toISOString(),
|
|
828
|
+
reviewType: reviewType,
|
|
829
|
+
isPRReview: options.isPRReview || false,
|
|
830
|
+
},
|
|
831
|
+
options,
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
/**
|
|
836
|
+
* Call LLM for code analysis
|
|
837
|
+
*
|
|
838
|
+
* @param {Object} context - Context for LLM
|
|
839
|
+
* @param {Object} options - Options
|
|
840
|
+
* @returns {Promise<Object>} Analysis results
|
|
841
|
+
*/
|
|
842
|
+
async function callLLMForAnalysis(context, options = {}) {
|
|
843
|
+
try {
|
|
844
|
+
let prompt;
|
|
845
|
+
const model = options.model || 'claude-sonnet-4-5';
|
|
846
|
+
const maxTokens = options.maxTokens || 8192; // Default to a safe limit
|
|
847
|
+
|
|
848
|
+
if (options.isHolisticPRReview) {
|
|
849
|
+
prompt = generateHolisticPRAnalysisPrompt(context);
|
|
850
|
+
} else {
|
|
851
|
+
prompt = options.isTestFile ? generateTestFileAnalysisPrompt(context) : generateAnalysisPrompt(context);
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
// Call LLM with the prompt
|
|
855
|
+
const llmResponse = await sendPromptToLLM(prompt, {
|
|
856
|
+
temperature: 0,
|
|
857
|
+
maxTokens: maxTokens,
|
|
858
|
+
model: model,
|
|
859
|
+
isJsonMode: true, // Standardize on using JSON mode if available
|
|
860
|
+
});
|
|
861
|
+
|
|
862
|
+
console.log(chalk.blue('Received LLM response, attempting to parse...'));
|
|
863
|
+
|
|
864
|
+
console.log(chalk.gray(`Response type: ${typeof llmResponse}`));
|
|
865
|
+
console.log(chalk.gray(`Response length: ${llmResponse?.length || 0} characters`));
|
|
866
|
+
|
|
867
|
+
// Parse the raw LLM response
|
|
868
|
+
const analysisResponse = parseAnalysisResponse(llmResponse);
|
|
869
|
+
|
|
870
|
+
// Validate the parsed response has the expected structure
|
|
871
|
+
if (!options.isHolisticPRReview && (!analysisResponse.summary || !Array.isArray(analysisResponse.issues))) {
|
|
872
|
+
console.warn(chalk.yellow('Parsed response missing expected structure, attempting to reconstruct...'));
|
|
873
|
+
|
|
874
|
+
return {
|
|
875
|
+
summary: analysisResponse.summary || 'Analysis completed with parsing issues',
|
|
876
|
+
issues: Array.isArray(analysisResponse.issues) ? analysisResponse.issues : [],
|
|
877
|
+
rawResponse: analysisResponse.rawResponse || llmResponse.substring(0, 500),
|
|
878
|
+
parseWarning: 'Response structure was reconstructed due to parsing issues',
|
|
879
|
+
};
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
console.log(chalk.green('Successfully parsed LLM response with expected structure'));
|
|
883
|
+
return analysisResponse;
|
|
884
|
+
} catch (error) {
|
|
885
|
+
console.error(chalk.red(`Error calling LLM for analysis: ${error.message}`));
|
|
886
|
+
console.error(error.stack);
|
|
887
|
+
throw error;
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
/**
|
|
892
|
+
* Appends critical JSON formatting requirements to a prompt.
|
|
893
|
+
* @param {string} promptBody - The main body of the prompt.
|
|
894
|
+
* @returns {string} The finalized prompt with JSON formatting instructions.
|
|
895
|
+
*/
|
|
896
|
+
function finalizePrompt(promptBody) {
|
|
897
|
+
return `${promptBody}
|
|
898
|
+
|
|
899
|
+
CRITICAL FORMATTING REQUIREMENTS:
|
|
900
|
+
- Respond ONLY with a valid JSON object
|
|
901
|
+
- Do not include any text before or after the JSON
|
|
902
|
+
- Do not wrap the JSON in markdown code blocks
|
|
903
|
+
- Ensure all strings are properly escaped
|
|
904
|
+
- Use double quotes for all string values
|
|
905
|
+
- Do not include trailing commas
|
|
906
|
+
- Validate that your response is parseable JSON before sending
|
|
907
|
+
|
|
908
|
+
MARKDOWN FORMATTING IN DESCRIPTIONS AND SUGGESTIONS:
|
|
909
|
+
- Use backticks (\`) around code elements like commands, flags, file names, variable names, function names, etc.
|
|
910
|
+
- Examples: \`git fetch\`, \`--unshallow\`, \`timeout-minutes\`, \`process.env.NODE_ENV\`, \`handleClick()\`
|
|
911
|
+
- Use backticks for any technical terms that would be considered "code" including:
|
|
912
|
+
- Command line tools and commands
|
|
913
|
+
- Command line flags and options
|
|
914
|
+
- Configuration keys and values
|
|
915
|
+
- File names and extensions
|
|
916
|
+
- Environment variables
|
|
917
|
+
- Function and variable names
|
|
918
|
+
- CSS classes and IDs
|
|
919
|
+
- HTML attributes
|
|
920
|
+
- API endpoints and parameters
|
|
921
|
+
- Do NOT use backticks around regular English words or common nouns
|
|
922
|
+
- Use proper markdown formatting for emphasis (*italics*, **bold**) when appropriate
|
|
923
|
+
|
|
924
|
+
Your response must start with { and end with } with no additional text.`;
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
// LLM call function
|
|
928
|
+
async function sendPromptToLLM(prompt, llmOptions) {
|
|
929
|
+
try {
|
|
930
|
+
if (!llm || typeof llm.sendPromptToClaude !== 'function') {
|
|
931
|
+
throw new Error('LLM module does not contain required function: sendPromptToClaude');
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
// Define schema for code review responses
|
|
935
|
+
const codeReviewSchema = {
|
|
936
|
+
type: 'object',
|
|
937
|
+
additionalProperties: false,
|
|
938
|
+
properties: {
|
|
939
|
+
summary: { type: 'string' },
|
|
940
|
+
issues: {
|
|
941
|
+
type: 'array',
|
|
942
|
+
items: {
|
|
943
|
+
type: 'object',
|
|
944
|
+
properties: {
|
|
945
|
+
type: { type: 'string' },
|
|
946
|
+
severity: { type: 'string' },
|
|
947
|
+
description: { type: 'string' },
|
|
948
|
+
lineNumbers: {
|
|
949
|
+
type: 'array',
|
|
950
|
+
items: { type: 'number' },
|
|
951
|
+
},
|
|
952
|
+
suggestion: { type: 'string' },
|
|
953
|
+
codeSuggestion: {
|
|
954
|
+
type: 'object',
|
|
955
|
+
properties: {
|
|
956
|
+
startLine: { type: 'number' },
|
|
957
|
+
endLine: { type: 'number' },
|
|
958
|
+
oldCode: { type: 'string' },
|
|
959
|
+
newCode: { type: 'string' },
|
|
960
|
+
},
|
|
961
|
+
required: ['startLine', 'oldCode', 'newCode'],
|
|
962
|
+
},
|
|
963
|
+
category: { type: 'string' },
|
|
964
|
+
},
|
|
965
|
+
required: ['type', 'severity', 'description', 'lineNumbers'],
|
|
966
|
+
},
|
|
967
|
+
},
|
|
968
|
+
crossFileIssues: {
|
|
969
|
+
type: 'array',
|
|
970
|
+
items: {
|
|
971
|
+
type: 'object',
|
|
972
|
+
properties: {
|
|
973
|
+
type: { type: 'string' },
|
|
974
|
+
severity: { type: 'string' },
|
|
975
|
+
message: { type: 'string' },
|
|
976
|
+
files: {
|
|
977
|
+
type: 'array',
|
|
978
|
+
items: { type: 'string' },
|
|
979
|
+
},
|
|
980
|
+
suggestion: { type: 'string' },
|
|
981
|
+
category: { type: 'string' },
|
|
982
|
+
},
|
|
983
|
+
required: ['type', 'severity', 'message', 'files'],
|
|
984
|
+
},
|
|
985
|
+
},
|
|
986
|
+
fileSpecificIssues: {
|
|
987
|
+
type: 'object',
|
|
988
|
+
additionalProperties: {
|
|
989
|
+
type: 'array',
|
|
990
|
+
items: {
|
|
991
|
+
type: 'object',
|
|
992
|
+
properties: {
|
|
993
|
+
type: { type: 'string' },
|
|
994
|
+
severity: { type: 'string' },
|
|
995
|
+
description: { type: 'string' },
|
|
996
|
+
lineNumbers: {
|
|
997
|
+
type: 'array',
|
|
998
|
+
items: { type: 'number' },
|
|
999
|
+
},
|
|
1000
|
+
suggestion: { type: 'string' },
|
|
1001
|
+
codeSuggestion: {
|
|
1002
|
+
type: 'object',
|
|
1003
|
+
properties: {
|
|
1004
|
+
startLine: { type: 'number' },
|
|
1005
|
+
endLine: { type: 'number' },
|
|
1006
|
+
oldCode: { type: 'string' },
|
|
1007
|
+
newCode: { type: 'string' },
|
|
1008
|
+
},
|
|
1009
|
+
required: ['startLine', 'oldCode', 'newCode'],
|
|
1010
|
+
},
|
|
1011
|
+
category: { type: 'string' },
|
|
1012
|
+
},
|
|
1013
|
+
required: ['type', 'severity', 'description', 'lineNumbers'],
|
|
1014
|
+
},
|
|
1015
|
+
},
|
|
1016
|
+
},
|
|
1017
|
+
recommendations: {
|
|
1018
|
+
type: 'array',
|
|
1019
|
+
items: {
|
|
1020
|
+
type: 'object',
|
|
1021
|
+
properties: {
|
|
1022
|
+
category: { type: 'string' },
|
|
1023
|
+
suggestion: { type: 'string' },
|
|
1024
|
+
priority: { type: 'string' },
|
|
1025
|
+
impact: { type: 'string' },
|
|
1026
|
+
},
|
|
1027
|
+
required: ['category', 'suggestion'],
|
|
1028
|
+
},
|
|
1029
|
+
},
|
|
1030
|
+
},
|
|
1031
|
+
required: ['summary'],
|
|
1032
|
+
};
|
|
1033
|
+
|
|
1034
|
+
const response = await llm.sendPromptToClaude(prompt, {
|
|
1035
|
+
...llmOptions,
|
|
1036
|
+
jsonSchema: codeReviewSchema,
|
|
1037
|
+
});
|
|
1038
|
+
|
|
1039
|
+
// Return the response object so parseAnalysisResponse can access the json property
|
|
1040
|
+
return response;
|
|
1041
|
+
} catch (error) {
|
|
1042
|
+
console.error(chalk.red(`Error in LLM call: ${error.message}`));
|
|
1043
|
+
throw error; // Re-throw to properly handle the error
|
|
1044
|
+
}
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
/**
|
|
1048
|
+
* Generate analysis prompt for LLM
|
|
1049
|
+
*
|
|
1050
|
+
* @param {Object} context - Context for LLM
|
|
1051
|
+
* @returns {string} Analysis prompt
|
|
1052
|
+
*/
|
|
1053
|
+
function generateAnalysisPrompt(context) {
|
|
1054
|
+
const { file, codeExamples, guidelineSnippets, customDocs, feedbackContext } = context;
|
|
1055
|
+
|
|
1056
|
+
// Format code examples and guidelines using shared helpers
|
|
1057
|
+
const formattedCodeExamples = formatCodeExamplesBlock(codeExamples, 'CODE EXAMPLE');
|
|
1058
|
+
const formattedGuidelines = formatGuidelinesBlock(guidelineSnippets, 'GUIDELINE');
|
|
1059
|
+
|
|
1060
|
+
// Check for PR comment context in the context object
|
|
1061
|
+
const { context: contextSections } = context;
|
|
1062
|
+
let prHistorySection = '';
|
|
1063
|
+
|
|
1064
|
+
console.log(chalk.blue(`🔍 Checking for PR comments in prompt generation...`));
|
|
1065
|
+
console.log(chalk.gray(`Context sections available: ${contextSections ? contextSections.length : 0}`));
|
|
1066
|
+
|
|
1067
|
+
if (contextSections && contextSections.length > 0) {
|
|
1068
|
+
contextSections.forEach((section, idx) => {
|
|
1069
|
+
console.log(chalk.gray(` Section ${idx + 1}: ${section.title} (${section.items?.length || 0} items)`));
|
|
1070
|
+
});
|
|
1071
|
+
|
|
1072
|
+
const prComments = contextSections.find((section) => section.title === 'Historical Review Comments');
|
|
1073
|
+
if (prComments && prComments.items.length > 0) {
|
|
1074
|
+
console.log(chalk.green(`✅ Adding ${prComments.items.length} PR comments to LLM prompt`));
|
|
1075
|
+
prHistorySection += `
|
|
1076
|
+
|
|
1077
|
+
CONTEXT C: HISTORICAL REVIEW COMMENTS
|
|
1078
|
+
Similar code patterns and issues identified by human reviewers in past PRs
|
|
1079
|
+
|
|
1080
|
+
`;
|
|
1081
|
+
prComments.items.slice(0, MAX_PR_COMMENTS_FOR_CONTEXT).forEach((comment, idx) => {
|
|
1082
|
+
prHistorySection += `### Historical Comment ${idx + 1}\n`;
|
|
1083
|
+
prHistorySection += `- **PR**: #${comment.pr_number} by ${comment.author}\n`;
|
|
1084
|
+
prHistorySection += `- **File**: ${comment.file_path}\n`;
|
|
1085
|
+
prHistorySection += `- **Type**: ${comment.comment_type}\n`;
|
|
1086
|
+
prHistorySection += `- **Relevance**: ${(comment.similarity_score * 100).toFixed(1)}%\n`;
|
|
1087
|
+
prHistorySection += `- **Review**: ${comment.comment_text}\n\n`;
|
|
1088
|
+
});
|
|
1089
|
+
|
|
1090
|
+
prHistorySection += `Use these historical patterns to identify DEFINITE issues in the current code. `;
|
|
1091
|
+
prHistorySection += `Only report issues that EXACTLY match historical patterns with SPECIFIC code fixes.\n\n`;
|
|
1092
|
+
|
|
1093
|
+
console.log(chalk.blue(`PR History section preview: ${prHistorySection.substring(0, 200)}...`));
|
|
1094
|
+
} else {
|
|
1095
|
+
console.log(chalk.yellow(`❌ No PR comments section found in context`));
|
|
1096
|
+
}
|
|
1097
|
+
} else {
|
|
1098
|
+
console.log(chalk.yellow(`❌ No context sections available for PR comments`));
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
// Detect if this is a diff review
|
|
1102
|
+
const isDiffReview = file.reviewType === 'DIFF REVIEW';
|
|
1103
|
+
const reviewInstructions = isDiffReview
|
|
1104
|
+
? 'Your task is to review the git diff by performing a two-stage analysis based **only** on the provided context, prioritizing documented guidelines and historical review patterns. Follow the context awareness instructions provided with the file content below.'
|
|
1105
|
+
: 'Your task is to review the following code file by performing a two-stage analysis based **only** on the provided context, prioritizing documented guidelines and historical review patterns.';
|
|
1106
|
+
|
|
1107
|
+
const fileSection = isDiffReview
|
|
1108
|
+
? `GIT DIFF TO REVIEW (FOCUS ONLY ON CHANGED LINES):
|
|
1109
|
+
Path: ${file.path}
|
|
1110
|
+
Language: ${file.language}
|
|
1111
|
+
Base Branch: ${file.diffInfo?.baseBranch || 'master'}
|
|
1112
|
+
Target Branch: ${file.diffInfo?.targetBranch || 'HEAD'}
|
|
1113
|
+
|
|
1114
|
+
**CRITICAL CONTEXT AWARENESS INSTRUCTIONS:**
|
|
1115
|
+
|
|
1116
|
+
You have access to TWO pieces of information:
|
|
1117
|
+
1. **FULL FILE CONTENT** - The complete file for understanding context
|
|
1118
|
+
2. **GIT DIFF** - Only the changes to review
|
|
1119
|
+
|
|
1120
|
+
**Review Rules:**
|
|
1121
|
+
- ONLY critique the CHANGED lines shown in the diff (lines with + or -)
|
|
1122
|
+
- USE the full file content to understand context and dependencies
|
|
1123
|
+
- DO NOT suggest adding code that already exists in the unchanged portions
|
|
1124
|
+
- DO NOT flag issues about missing code if it exists in the full file
|
|
1125
|
+
- Do NOT flag functions/variables as missing if they exist elsewhere in the full file
|
|
1126
|
+
- The unchanged code is part of the file - check it before making assumptions
|
|
1127
|
+
|
|
1128
|
+
**FULL FILE CONTENT (for context - DO NOT review unchanged code):**
|
|
1129
|
+
|
|
1130
|
+
\`\`\`${file.language}
|
|
1131
|
+
${file.fullFileContent || file.content}
|
|
1132
|
+
\`\`\`
|
|
1133
|
+
|
|
1134
|
+
**GIT DIFF TO REVIEW (critique ONLY these changes):**
|
|
1135
|
+
|
|
1136
|
+
\`\`\`diff
|
|
1137
|
+
${file.content}
|
|
1138
|
+
\`\`\``
|
|
1139
|
+
: `FILE TO REVIEW:
|
|
1140
|
+
Path: ${file.path}
|
|
1141
|
+
Language: ${file.language}
|
|
1142
|
+
|
|
1143
|
+
\`\`\`${file.language}
|
|
1144
|
+
${file.content}
|
|
1145
|
+
\`\`\``;
|
|
1146
|
+
|
|
1147
|
+
// Add project architecture context if available
|
|
1148
|
+
let projectArchitectureSection = '';
|
|
1149
|
+
if (context.projectSummary) {
|
|
1150
|
+
projectArchitectureSection = formatProjectSummaryForLLM(context.projectSummary);
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
// Use shared helpers for custom docs and role definition
|
|
1154
|
+
const customDocsSection = formatCustomDocsSection(customDocs);
|
|
1155
|
+
const roleDefinition = buildRoleDefinition(
|
|
1156
|
+
'You are an expert code reviewer acting as a senior developer on this specific project.',
|
|
1157
|
+
customDocs,
|
|
1158
|
+
'code'
|
|
1159
|
+
);
|
|
1160
|
+
|
|
1161
|
+
// Corrected prompt with full two-stage analysis + combined output stage
|
|
1162
|
+
return finalizePrompt(`
|
|
1163
|
+
${roleDefinition}
|
|
1164
|
+
|
|
1165
|
+
${reviewInstructions}
|
|
1166
|
+
|
|
1167
|
+
${customDocsSection}
|
|
1168
|
+
|
|
1169
|
+
${fileSection}
|
|
1170
|
+
|
|
1171
|
+
CONTEXT FROM PROJECT:
|
|
1172
|
+
${projectArchitectureSection}
|
|
1173
|
+
|
|
1174
|
+
CONTEXT A: EXPLICIT GUIDELINES FROM DOCUMENTATION
|
|
1175
|
+
${formattedGuidelines}
|
|
1176
|
+
|
|
1177
|
+
CONTEXT B: SIMILAR CODE EXAMPLES FROM PROJECT
|
|
1178
|
+
${formattedCodeExamples}
|
|
1179
|
+
|
|
1180
|
+
${prHistorySection}
|
|
1181
|
+
|
|
1182
|
+
${feedbackContext || ''}
|
|
1183
|
+
|
|
1184
|
+
INSTRUCTIONS:
|
|
1185
|
+
|
|
1186
|
+
${getCriticalRulesBlock({ importRuleContext: 'code' })}
|
|
1187
|
+
|
|
1188
|
+
**Perform the following analysis stages sequentially:**
|
|
1189
|
+
|
|
1190
|
+
**STAGE 1: Custom Instructions & Guideline-Based Review**
|
|
1191
|
+
1. **FIRST AND MOST IMPORTANT**: If custom instructions were provided at the beginning of this prompt, analyze the 'FILE TO REVIEW' against those custom instructions BEFORE all other analysis. Custom instructions always take precedence.
|
|
1192
|
+
2. Analyze the 'FILE TO REVIEW' strictly against the standards, rules, and explanations provided in 'CONTEXT A: EXPLICIT GUIDELINES'.
|
|
1193
|
+
3. Identify any specific deviations where the reviewed code violates custom instructions OR explicit guidelines. **CRITICAL**: When you find violations of custom instructions, you MUST cite the specific custom instruction source document name in your issue description and suggestion.
|
|
1194
|
+
4. Temporarily ignore 'CONTEXT B: SIMILAR CODE EXAMPLES' during this stage.
|
|
1195
|
+
|
|
1196
|
+
**STAGE 2: Code Example-Based Review (CRITICAL FOR IMPLICIT PATTERNS)**
|
|
1197
|
+
1. **CRITICAL FIRST STEP**: Scan ALL code examples in Context B and create a mental list of:
|
|
1198
|
+
- Common import statements (especially those containing 'helper', 'util', 'shared', 'common', 'test')
|
|
1199
|
+
- Frequently used function calls that appear across multiple examples
|
|
1200
|
+
- Project-specific wrappers or utilities (e.g., \`renderWithTestHelpers\` instead of direct \`render\`)
|
|
1201
|
+
- Consistent patterns in how operations are performed
|
|
1202
|
+
2. **IMPORTANT**: For each common utility or pattern you identify, note:
|
|
1203
|
+
- Which files use it (cite specific examples)
|
|
1204
|
+
- What the pattern appears to do
|
|
1205
|
+
- Whether the reviewed file is using this pattern or not
|
|
1206
|
+
3. Analyze the 'FILE TO REVIEW' against these discovered patterns. Focus on:
|
|
1207
|
+
- Missing imports of commonly used utilities
|
|
1208
|
+
- Direct library usage where others use project wrappers
|
|
1209
|
+
- Deviations from established patterns
|
|
1210
|
+
4. **HIGH PRIORITY**: Flag any instances where:
|
|
1211
|
+
- The reviewed code uses a direct library call (e.g., \`render\`) when multiple examples use a project wrapper (e.g., \`renderWithTestHelpers\`)
|
|
1212
|
+
- Common utility functions available in the project are not being imported or used
|
|
1213
|
+
- The code deviates from patterns that appear in 3+ examples
|
|
1214
|
+
5. Pay special attention to imports - if most similar files import certain utilities, the reviewed file should too.
|
|
1215
|
+
|
|
1216
|
+
**STAGE 3: Historical Review Comments Analysis**
|
|
1217
|
+
1. **CRITICAL**: If 'CONTEXT C: HISTORICAL REVIEW COMMENTS' is present, analyze each historical comment:
|
|
1218
|
+
- Look for patterns in the types of issues human reviewers have identified in similar code
|
|
1219
|
+
- Identify if the SAME DEFINITE issue exists in the current file (not similar - the SAME)
|
|
1220
|
+
- Pay special attention to comments with high relevance scores (>70%)
|
|
1221
|
+
2. **Apply Historical Insights**: For each historical comment:
|
|
1222
|
+
- Only report if the EXACT same issue type exists with a SPECIFIC code fix
|
|
1223
|
+
- Do NOT report speculative issues based on historical patterns
|
|
1224
|
+
3. **Prioritize Historical Issues**: Issues DEFINITELY matching historical patterns get high priority
|
|
1225
|
+
|
|
1226
|
+
**STAGE 4: Consolidate, Prioritize, and Generate Output**
|
|
1227
|
+
1. **CRITICAL REMINDER**: If custom instructions were provided at the beginning of this prompt, they take ABSOLUTE PRECEDENCE over all other guidelines and must be followed strictly.
|
|
1228
|
+
2. Combine the potential issues identified in Stage 1 (Guideline-Based), Stage 2 (Example-Based), and Stage 3 (Historical Review Comments).
|
|
1229
|
+
3. **Apply Conflict Resolution AND Citation Rules:**
|
|
1230
|
+
* **Guideline Precedence:** If an issue identified in Stage 2 (from code examples) or Stage 3 (from historical comments) **contradicts** an explicit guideline from Stage 1, **discard the conflicting issue**. Guidelines always take precedence.
|
|
1231
|
+
* **Citation Priority:** When reporting an issue:
|
|
1232
|
+
* **CRITICAL FOR CUSTOM INSTRUCTIONS**: If the issue violates a custom instruction provided at the beginning of this prompt, you MUST include the source document name in both the description and suggestion. For example: "violates the coding standards specified in '[Document Name]'" or "as required by '[Document Name]'".
|
|
1233
|
+
* If the relevant convention or standard is defined in 'CONTEXT A: EXPLICIT GUIDELINES', cite the guideline document.
|
|
1234
|
+
* For implicit patterns discovered from code examples (like helper utilities, common practices), cite the specific code examples that demonstrate the pattern.
|
|
1235
|
+
* For issues identified from historical review comments, report them as standard code review findings without referencing the historical source.
|
|
1236
|
+
* **IMPORTANT**: When citing implicit patterns from Context B, be specific about which files demonstrate the pattern and what the pattern is.
|
|
1237
|
+
4. **Special attention to implicit patterns**: Issues related to not using project-specific utilities or helpers should be marked as high priority if the pattern appears consistently across multiple examples in Context B.
|
|
1238
|
+
5. **Special attention to historical patterns**: Issues DEFINITELY matching historical patterns get high priority.
|
|
1239
|
+
6. Assess for DEFINITE logic errors or bugs only - do NOT report speculative issues.
|
|
1240
|
+
7. **CRITICAL OUTPUT FILTER**: Before reporting ANY issue, ask yourself: "Do I have a SPECIFIC code fix?" If not, do NOT report it. Do NOT ask the developer to verify, ensure, or check anything.
|
|
1241
|
+
8. **CRITICAL 'lineNumbers' RULE - MANDATORY COMPLIANCE**:
|
|
1242
|
+
- **ALWAYS provide line numbers** - this field is REQUIRED for every issue
|
|
1243
|
+
- If you can identify specific lines, provide them (max 3-5 for repeated issues)
|
|
1244
|
+
- If the issue affects the entire file or cannot be pinpointed, provide [1] or relevant section line numbers
|
|
1245
|
+
- For ANY issue that occurs multiple times in a file, list ONLY the first 3-5 occurrences maximum
|
|
1246
|
+
- NEVER provide exhaustive lists of line numbers (e.g., [1,2,3,4,5,6,7,8,9,10...])
|
|
1247
|
+
- If an issue affects many lines, use representative examples only
|
|
1248
|
+
- Exhaustive line number lists are considered hallucination and must be avoided
|
|
1249
|
+
- Example: Instead of listing 20+ line numbers, use [15, 23, 47]
|
|
1250
|
+
- **NEVER omit lineNumbers** - empty arrays [] are not allowed
|
|
1251
|
+
9. Format the final, consolidated, and prioritized list of issues, along with a brief overall summary, **strictly** according to the JSON structure below.
|
|
1252
|
+
10. CRITICAL: Respond ONLY with valid JSON - start with { and end with }, no additional text.
|
|
1253
|
+
|
|
1254
|
+
${getFinalReminderBlock()}
|
|
1255
|
+
|
|
1256
|
+
${getCitationRequirementBlock()}
|
|
1257
|
+
|
|
1258
|
+
REQUIRED JSON OUTPUT FORMAT:
|
|
1259
|
+
|
|
1260
|
+
**REMINDER: lineNumbers is REQUIRED - always provide at least one line number. Use ONLY 3-5 representative line numbers for repeated issues. NEVER provide exhaustive lists or empty arrays.**
|
|
1261
|
+
|
|
1262
|
+
${getCodeSuggestionsFormatBlock()}
|
|
1263
|
+
|
|
1264
|
+
You must respond with EXACTLY this JSON structure, with no additional text:
|
|
1265
|
+
|
|
1266
|
+
{
|
|
1267
|
+
"summary": "Brief summary of the review, highlighting adherence to documented guidelines and consistency with code examples, plus any major issues found.",
|
|
1268
|
+
"issues": [
|
|
1269
|
+
{
|
|
1270
|
+
"type": "bug | improvement | convention | performance | security",
|
|
1271
|
+
"severity": "critical | high | medium",
|
|
1272
|
+
"description": "Description of the issue, clearly stating the deviation from the prioritized project pattern (guideline or example) OR the nature of the bug/improvement.",
|
|
1273
|
+
"lineNumbers": [42, 55, 61],
|
|
1274
|
+
"suggestion": "Concrete suggestion for fixing the issue or aligning with the prioritized inferred pattern. Ensure the suggestion is additive if adding missing functionality (like a hook) and doesn't wrongly suggest replacing existing, unrelated code.",
|
|
1275
|
+
"codeSuggestion": {
|
|
1276
|
+
"startLine": 42,
|
|
1277
|
+
"endLine": 44,
|
|
1278
|
+
"oldCode": " const result = data.map(item => item.value);",
|
|
1279
|
+
"newCode": " const result = data?.map(item => item?.value) ?? [];"
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
]
|
|
1283
|
+
}
|
|
1284
|
+
`);
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
/**
|
|
1288
|
+
* Generate test file analysis prompt for LLM
|
|
1289
|
+
*
|
|
1290
|
+
* @param {Object} context - Context for LLM
|
|
1291
|
+
* @returns {string} Test file analysis prompt
|
|
1292
|
+
*/
|
|
1293
|
+
function generateTestFileAnalysisPrompt(context) {
|
|
1294
|
+
const { file, codeExamples, guidelineSnippets, customDocs } = context;
|
|
1295
|
+
|
|
1296
|
+
// Format code examples and guidelines using shared helpers
|
|
1297
|
+
const formattedCodeExamples = formatCodeExamplesBlock(codeExamples, 'TEST EXAMPLE');
|
|
1298
|
+
const formattedGuidelines = formatGuidelinesBlock(guidelineSnippets, 'TESTING GUIDELINE');
|
|
1299
|
+
|
|
1300
|
+
// Detect if this is a diff review
|
|
1301
|
+
const isDiffReview = file.reviewType === 'DIFF REVIEW';
|
|
1302
|
+
const reviewInstructions = isDiffReview
|
|
1303
|
+
? 'Your task is to review the test file git diff by performing a comprehensive analysis focused on testing best practices and patterns. Follow the context awareness instructions provided with the file content below.'
|
|
1304
|
+
: 'Your task is to review the following test file by performing a comprehensive analysis focused on testing best practices and patterns.';
|
|
1305
|
+
|
|
1306
|
+
const fileSection = isDiffReview
|
|
1307
|
+
? `TEST FILE GIT DIFF TO REVIEW (FOCUS ONLY ON CHANGED LINES):
|
|
1308
|
+
Path: ${file.path}
|
|
1309
|
+
Language: ${file.language}
|
|
1310
|
+
Base Branch: ${file.diffInfo?.baseBranch || 'master'}
|
|
1311
|
+
Target Branch: ${file.diffInfo?.targetBranch || 'HEAD'}
|
|
1312
|
+
|
|
1313
|
+
**CRITICAL CONTEXT AWARENESS INSTRUCTIONS:**
|
|
1314
|
+
|
|
1315
|
+
You have access to TWO pieces of information:
|
|
1316
|
+
1. **FULL TEST FILE CONTENT** - The complete test file for understanding existing test coverage
|
|
1317
|
+
2. **GIT DIFF** - Only the test changes to review
|
|
1318
|
+
|
|
1319
|
+
**Review Rules:**
|
|
1320
|
+
- ONLY critique the CHANGED lines in the diff (lines with + or -)
|
|
1321
|
+
- USE the full file to verify existing test coverage before suggesting new tests
|
|
1322
|
+
- DO NOT suggest adding tests that already exist in the unchanged portions
|
|
1323
|
+
- DO NOT flag missing test coverage if tests exist elsewhere in the file
|
|
1324
|
+
- Check the full file for existing test cases before making assumptions
|
|
1325
|
+
- The unchanged test code is part of the file - review it before suggesting additions
|
|
1326
|
+
|
|
1327
|
+
**FULL TEST FILE CONTENT (for context - check for existing tests):**
|
|
1328
|
+
|
|
1329
|
+
\`\`\`${file.language}
|
|
1330
|
+
${file.fullFileContent || file.content}
|
|
1331
|
+
\`\`\`
|
|
1332
|
+
|
|
1333
|
+
**GIT DIFF TO REVIEW (critique ONLY these changes):**
|
|
1334
|
+
|
|
1335
|
+
\`\`\`diff
|
|
1336
|
+
${file.content}
|
|
1337
|
+
\`\`\``
|
|
1338
|
+
: `TEST FILE TO REVIEW:
|
|
1339
|
+
Path: ${file.path}
|
|
1340
|
+
Language: ${file.language}
|
|
1341
|
+
|
|
1342
|
+
\`\`\`${file.language}
|
|
1343
|
+
${file.content}
|
|
1344
|
+
\`\`\``;
|
|
1345
|
+
|
|
1346
|
+
// Use shared helpers for custom docs and role definition
|
|
1347
|
+
const customDocsSection = formatCustomDocsSection(customDocs);
|
|
1348
|
+
const roleDefinition = buildRoleDefinition(
|
|
1349
|
+
'You are an expert test code reviewer acting as a senior developer on this specific project.',
|
|
1350
|
+
customDocs,
|
|
1351
|
+
'test'
|
|
1352
|
+
);
|
|
1353
|
+
|
|
1354
|
+
// Add project architecture context if available
|
|
1355
|
+
let projectArchitectureSection = '';
|
|
1356
|
+
if (context.projectSummary) {
|
|
1357
|
+
projectArchitectureSection = formatProjectSummaryForLLM(context.projectSummary);
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
// Test-specific prompt
|
|
1361
|
+
return finalizePrompt(`
|
|
1362
|
+
${roleDefinition}
|
|
1363
|
+
|
|
1364
|
+
${reviewInstructions}
|
|
1365
|
+
|
|
1366
|
+
${fileSection}
|
|
1367
|
+
|
|
1368
|
+
## ANALYSIS CONTEXT
|
|
1369
|
+
${customDocsSection}
|
|
1370
|
+
|
|
1371
|
+
CONTEXT FROM PROJECT:
|
|
1372
|
+
${projectArchitectureSection}
|
|
1373
|
+
|
|
1374
|
+
CONTEXT A: TESTING GUIDELINES AND BEST PRACTICES
|
|
1375
|
+
${formattedGuidelines}
|
|
1376
|
+
|
|
1377
|
+
CONTEXT B: SIMILAR TEST EXAMPLES FROM PROJECT
|
|
1378
|
+
${formattedCodeExamples}
|
|
1379
|
+
|
|
1380
|
+
INSTRUCTIONS:
|
|
1381
|
+
|
|
1382
|
+
${getCriticalRulesBlock({ importRuleContext: 'test' })}
|
|
1383
|
+
|
|
1384
|
+
**Perform the following test-specific analysis:**
|
|
1385
|
+
|
|
1386
|
+
**STAGE 1: Custom Instructions & Test Coverage Analysis**
|
|
1387
|
+
1. **FIRST AND MOST IMPORTANT**: If custom instructions were provided at the beginning of this prompt, analyze the test file against those custom instructions BEFORE all other analysis. Custom instructions always take precedence.
|
|
1388
|
+
2. Analyze test coverage - identify SPECIFIC missing test cases only if you can name the exact scenario that should be tested.
|
|
1389
|
+
3. Only report coverage gaps where you can provide a concrete test case to add.
|
|
1390
|
+
|
|
1391
|
+
**STAGE 2: Test Quality and Best Practices**
|
|
1392
|
+
1. Evaluate test naming conventions - report only DEFINITE violations where you can show the correct naming.
|
|
1393
|
+
2. Analyze test organization - report only if tests are clearly misorganized with a specific fix.
|
|
1394
|
+
3. Assess assertion quality - report only weak assertions where you can provide a stronger alternative.
|
|
1395
|
+
4. Review test isolation - report only if you find a DEFINITE side effect issue with a specific fix.
|
|
1396
|
+
|
|
1397
|
+
**STAGE 3: Testing Patterns and Conventions (CRITICAL)**
|
|
1398
|
+
1. **IMPORTANT**: Carefully analyze ALL code examples in Context B to identify:
|
|
1399
|
+
- Common helper functions or utilities that appear across multiple test files
|
|
1400
|
+
- Consistent patterns in how certain operations are performed (e.g., rendering, mocking, assertions)
|
|
1401
|
+
- Any project-specific abstractions or wrappers around standard testing libraries
|
|
1402
|
+
2. **CRITICAL**: Compare the reviewed test file against these discovered patterns. Flag ONLY instances where:
|
|
1403
|
+
- The test DEFINITELY uses a direct library call when a project wrapper exists (cite the wrapper)
|
|
1404
|
+
- A common utility is DEFINITELY available but not used (cite where it's defined)
|
|
1405
|
+
- The test CLEARLY deviates from a pattern shown in 3+ examples (cite the examples)
|
|
1406
|
+
3. Report mocking/stubbing issues only with a specific code fix.
|
|
1407
|
+
4. Report fixture issues only with a specific code fix showing the correct pattern.
|
|
1408
|
+
5. Report async handling issues only with specific code showing the correct approach.
|
|
1409
|
+
|
|
1410
|
+
**STAGE 4: Performance and Maintainability**
|
|
1411
|
+
1. Report slow tests only if you can identify the specific cause and fix.
|
|
1412
|
+
2. Report code duplication only with a specific refactoring suggestion.
|
|
1413
|
+
|
|
1414
|
+
**STAGE 5: Consolidate and Generate Output**
|
|
1415
|
+
1. **CRITICAL**: Prioritize issues where the test deviates from implicit project patterns shown in Context B (similar test examples), especially regarding test utilities and helper functions.
|
|
1416
|
+
2. Provide concrete suggestions that align with the project's testing patterns, referencing specific examples from Context B when applicable.
|
|
1417
|
+
3. Assess for any potential logic errors or bugs within the reviewed code itself, independent of conventions, and include them as separate issues.
|
|
1418
|
+
4. **CRITICAL 'lineNumbers' RULE - MANDATORY COMPLIANCE**:
|
|
1419
|
+
- For ANY issue that occurs multiple times in a test file, list ONLY the first 3-5 occurrences maximum
|
|
1420
|
+
- NEVER provide exhaustive lists of line numbers (e.g., [1,2,3,4,5,6,7,8,9,10...])
|
|
1421
|
+
- If an issue affects many lines, use representative examples only
|
|
1422
|
+
- Exhaustive line number lists are considered hallucination and must be avoided
|
|
1423
|
+
- Example: Instead of listing 20+ line numbers, use [15, 23, 47, "...and 12 other occurrences"]
|
|
1424
|
+
5. Format the output according to the JSON structure below.
|
|
1425
|
+
|
|
1426
|
+
${getFinalReminderBlock()}
|
|
1427
|
+
|
|
1428
|
+
${getCitationRequirementBlock()}
|
|
1429
|
+
|
|
1430
|
+
REQUIRED JSON OUTPUT FORMAT:
|
|
1431
|
+
|
|
1432
|
+
**REMINDER: For lineNumbers array, use ONLY 3-5 representative line numbers for repeated issues. NEVER provide exhaustive lists.**
|
|
1433
|
+
|
|
1434
|
+
${getCodeSuggestionsFormatBlock()}
|
|
1435
|
+
|
|
1436
|
+
You must respond with EXACTLY this JSON structure, with no additional text:
|
|
1437
|
+
|
|
1438
|
+
{
|
|
1439
|
+
"summary": "Brief summary of the test file review, highlighting coverage completeness, adherence to testing best practices, and any critical issues found.",
|
|
1440
|
+
"issues": [
|
|
1441
|
+
{
|
|
1442
|
+
"type": "bug | improvement | convention | performance | coverage",
|
|
1443
|
+
"severity": "critical | high | medium",
|
|
1444
|
+
"description": "Description of the issue, clearly stating the problem with the test implementation or coverage gap.",
|
|
1445
|
+
"lineNumbers": [25, 38],
|
|
1446
|
+
"suggestion": "Concrete suggestion for improving the test, adding missing coverage, or following testing best practices.",
|
|
1447
|
+
"codeSuggestion": {
|
|
1448
|
+
"startLine": 25,
|
|
1449
|
+
"endLine": 27,
|
|
1450
|
+
"oldCode": " expect(result).toBe(true);",
|
|
1451
|
+
"newCode": " expect(result).toBe(true);\n expect(result).not.toBeNull();"
|
|
1452
|
+
}
|
|
1453
|
+
}
|
|
1454
|
+
]
|
|
1455
|
+
}
|
|
1456
|
+
`);
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
/**
|
|
1460
|
+
* Generate holistic PR analysis prompt for LLM
|
|
1461
|
+
*
|
|
1462
|
+
* @param {Object} context - Holistic context for LLM
|
|
1463
|
+
* @returns {string} Holistic PR analysis prompt
|
|
1464
|
+
*/
|
|
1465
|
+
function generateHolisticPRAnalysisPrompt(context) {
|
|
1466
|
+
const { file, context: contextSections, customDocs } = context;
|
|
1467
|
+
|
|
1468
|
+
// Format unified context sections
|
|
1469
|
+
const formattedCodeExamples =
|
|
1470
|
+
contextSections
|
|
1471
|
+
.find((s) => s.title === 'Similar Code Examples')
|
|
1472
|
+
?.items?.slice(0, 10)
|
|
1473
|
+
.map((ex, idx) => {
|
|
1474
|
+
return `
|
|
1475
|
+
CODE EXAMPLE ${idx + 1} (Similarity: ${ex.similarity?.toFixed(3) || 'N/A'})
|
|
1476
|
+
Path: ${ex.path}
|
|
1477
|
+
Language: ${ex.language}
|
|
1478
|
+
|
|
1479
|
+
\`\`\`${ex.language || ''}
|
|
1480
|
+
${ex.content}
|
|
1481
|
+
\`\`\`
|
|
1482
|
+
`;
|
|
1483
|
+
})
|
|
1484
|
+
.join('\n') || 'No relevant code examples found.';
|
|
1485
|
+
|
|
1486
|
+
const formattedGuidelines =
|
|
1487
|
+
contextSections
|
|
1488
|
+
.find((s) => s.title === 'Project Guidelines')
|
|
1489
|
+
?.items?.slice(0, 8)
|
|
1490
|
+
.map((g, idx) => {
|
|
1491
|
+
return `
|
|
1492
|
+
GUIDELINE ${idx + 1} (Source: ${g.path})
|
|
1493
|
+
${g.headingText ? `Heading: "${g.headingText}"` : ''}
|
|
1494
|
+
|
|
1495
|
+
\`\`\`
|
|
1496
|
+
${g.content}
|
|
1497
|
+
\`\`\`
|
|
1498
|
+
`;
|
|
1499
|
+
})
|
|
1500
|
+
.join('\n') || 'No specific guidelines found.';
|
|
1501
|
+
|
|
1502
|
+
const formattedPRComments =
|
|
1503
|
+
contextSections
|
|
1504
|
+
.find((s) => s.title === 'Historical Review Comments')
|
|
1505
|
+
?.items?.slice(0, MAX_PR_COMMENTS_FOR_CONTEXT)
|
|
1506
|
+
.map((comment, idx) => {
|
|
1507
|
+
return `### Historical Comment ${idx + 1}
|
|
1508
|
+
- **PR**: #${comment.prNumber} by ${comment.author}
|
|
1509
|
+
- **File**: ${comment.filePath}
|
|
1510
|
+
- **Type**: ${comment.commentType || 'review'}
|
|
1511
|
+
- **Relevance**: ${(comment.relevanceScore * 100).toFixed(1)}%
|
|
1512
|
+
- **Review**: ${comment.body}
|
|
1513
|
+
|
|
1514
|
+
`;
|
|
1515
|
+
})
|
|
1516
|
+
.join('\n') || 'No historical PR comments found.';
|
|
1517
|
+
|
|
1518
|
+
// Format PR files with their diffs
|
|
1519
|
+
const prFiles = file.prFiles || [];
|
|
1520
|
+
const formattedPRFiles = prFiles
|
|
1521
|
+
.map((prFile, idx) => {
|
|
1522
|
+
return `
|
|
1523
|
+
## FILE ${idx + 1}: ${prFile.path}
|
|
1524
|
+
**Language**: ${prFile.language}
|
|
1525
|
+
**Type**: ${prFile.isTest ? 'Test' : 'Source'} file
|
|
1526
|
+
**Summary**: ${prFile.summary}
|
|
1527
|
+
|
|
1528
|
+
### Changes (Git Diff):
|
|
1529
|
+
\`\`\`diff
|
|
1530
|
+
${prFile.diff}
|
|
1531
|
+
\`\`\`
|
|
1532
|
+
|
|
1533
|
+
### Full File Content (For Context):
|
|
1534
|
+
\`\`\`${prFile.language}
|
|
1535
|
+
${prFile.fullContent}
|
|
1536
|
+
\`\`\`
|
|
1537
|
+
`;
|
|
1538
|
+
})
|
|
1539
|
+
.join('\n');
|
|
1540
|
+
|
|
1541
|
+
// Use shared helper for custom docs section
|
|
1542
|
+
const customDocsSection = formatCustomDocsSection(customDocs);
|
|
1543
|
+
|
|
1544
|
+
// Build the role definition - PR analysis has additional context awareness instructions
|
|
1545
|
+
const baseRole = `You are an expert code reviewer performing a holistic review of a Pull Request with ${prFiles.length} files.
|
|
1546
|
+
|
|
1547
|
+
**CRITICAL CONTEXT AWARENESS INSTRUCTIONS:**
|
|
1548
|
+
|
|
1549
|
+
For each file in this PR, you have access to:
|
|
1550
|
+
1. **FULL FILE CONTENT** - The complete file for understanding context and existing code
|
|
1551
|
+
2. **GIT DIFF** - Only the changes to review
|
|
1552
|
+
|
|
1553
|
+
**Review Rules:**
|
|
1554
|
+
- ONLY critique the CHANGED lines shown in each file's diff (lines with + or -)
|
|
1555
|
+
- USE the full file content to understand context, dependencies, and existing implementations
|
|
1556
|
+
- DO NOT suggest adding code that already exists in the unchanged portions of any file
|
|
1557
|
+
- DO NOT flag issues about missing code if it exists elsewhere in the full file
|
|
1558
|
+
- Before flagging cross-file issues, verify the code doesn't already exist in unchanged portions
|
|
1559
|
+
- Do NOT flag functions/variables as missing if they exist elsewhere in the full file
|
|
1560
|
+
- The unchanged code is part of each file - always check it before making assumptions`;
|
|
1561
|
+
|
|
1562
|
+
let roleDefinition = buildRoleDefinition(baseRole, customDocs, 'pr');
|
|
1563
|
+
roleDefinition += '\nAnalyze ALL files together to identify cross-file issues, consistency problems, and overall code quality.';
|
|
1564
|
+
|
|
1565
|
+
// Add project architecture context if available
|
|
1566
|
+
let projectArchitectureSection = '';
|
|
1567
|
+
if (context.projectSummary) {
|
|
1568
|
+
projectArchitectureSection = formatProjectSummaryForLLM(context.projectSummary);
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
return finalizePrompt(`
|
|
1572
|
+
${roleDefinition}
|
|
1573
|
+
|
|
1574
|
+
## PULL REQUEST OVERVIEW
|
|
1575
|
+
- **Total Files**: ${prFiles.length}
|
|
1576
|
+
- **Source Files**: ${prFiles.filter((f) => !f.isTest).length}
|
|
1577
|
+
- **Test Files**: ${prFiles.filter((f) => f.isTest).length}
|
|
1578
|
+
|
|
1579
|
+
## UNIFIED CONTEXT FROM PROJECT
|
|
1580
|
+
${projectArchitectureSection}
|
|
1581
|
+
|
|
1582
|
+
### PROJECT CODE EXAMPLES
|
|
1583
|
+
${formattedCodeExamples}
|
|
1584
|
+
|
|
1585
|
+
### PROJECT GUIDELINES
|
|
1586
|
+
${formattedGuidelines}
|
|
1587
|
+
|
|
1588
|
+
### HISTORICAL REVIEW COMMENTS
|
|
1589
|
+
${formattedPRComments}
|
|
1590
|
+
|
|
1591
|
+
## PR FILES WITH CHANGES
|
|
1592
|
+
${formattedPRFiles}
|
|
1593
|
+
|
|
1594
|
+
## ANALYSIS CONTEXT
|
|
1595
|
+
${customDocsSection}
|
|
1596
|
+
|
|
1597
|
+
## ANALYSIS INSTRUCTIONS
|
|
1598
|
+
|
|
1599
|
+
${getCriticalRulesBlock({ importRuleContext: 'pr' })}
|
|
1600
|
+
|
|
1601
|
+
**Perform the following holistic analysis stages sequentially for all PR files:**
|
|
1602
|
+
|
|
1603
|
+
### **STAGE 1: Project Pattern Analysis (CRITICAL FOR CONSISTENCY)**
|
|
1604
|
+
|
|
1605
|
+
1. **CRITICAL FIRST STEP**: Scan ALL code examples in PROJECT CODE EXAMPLES and create a comprehensive list of:
|
|
1606
|
+
- Common import statements (especially those containing 'helper', 'util', 'shared', 'common', 'test')
|
|
1607
|
+
- Frequently used function calls that appear across multiple examples
|
|
1608
|
+
- Project-specific wrappers or utilities (e.g., \`renderWithTestHelpers\` instead of direct \`render\`)
|
|
1609
|
+
- Consistent patterns in how operations are performed
|
|
1610
|
+
- Testing patterns and helper functions
|
|
1611
|
+
- Component patterns and architectural approaches
|
|
1612
|
+
|
|
1613
|
+
2. **IMPORTANT**: For each common utility or pattern you identify, note:
|
|
1614
|
+
- Which example files demonstrate it (cite specific examples)
|
|
1615
|
+
- What the pattern appears to do
|
|
1616
|
+
- Whether ALL PR files are using this pattern consistently
|
|
1617
|
+
|
|
1618
|
+
3. **HIGH PRIORITY CROSS-FILE CHECKS**: Flag any instances where:
|
|
1619
|
+
- Files use direct library calls when multiple examples use project wrappers
|
|
1620
|
+
- Common utility functions available in the project are not being imported/used consistently
|
|
1621
|
+
- Files deviate from patterns that appear in 3+ examples
|
|
1622
|
+
- Test files don't follow established test helper patterns
|
|
1623
|
+
- Import statements are inconsistent across similar files
|
|
1624
|
+
|
|
1625
|
+
### **STAGE 2: Custom Instructions & Guideline Compliance Analysis**
|
|
1626
|
+
|
|
1627
|
+
1. **FIRST AND MOST IMPORTANT**: If custom instructions were provided at the beginning of this prompt, analyze ALL PR files against those custom instructions BEFORE all other analysis. Custom instructions always take precedence.
|
|
1628
|
+
2. Analyze ALL PR files strictly against the standards, rules, and explanations in PROJECT GUIDELINES
|
|
1629
|
+
3. Identify specific deviations where any file violates custom instructions OR explicit guidelines. Note the source for each deviation found.
|
|
1630
|
+
4. Check for consistency of guideline application across all files
|
|
1631
|
+
5. Ensure architectural decisions are consistent across the PR
|
|
1632
|
+
|
|
1633
|
+
### **STAGE 3: Historical Pattern Recognition**
|
|
1634
|
+
|
|
1635
|
+
1. **CRITICAL**: Analyze HISTORICAL REVIEW COMMENTS to identify patterns:
|
|
1636
|
+
- Types of issues human reviewers frequently flag in similar code
|
|
1637
|
+
- Recurring themes across multiple historical comments
|
|
1638
|
+
- High-relevance issues (>70% relevance score) that apply to current PR
|
|
1639
|
+
|
|
1640
|
+
2. **Apply Historical Insights to Each File**:
|
|
1641
|
+
- Identify DEFINITE issues that match historical patterns across PR files
|
|
1642
|
+
- Apply reviewer suggestions that are relevant to current changes
|
|
1643
|
+
- Look for patterns that span multiple files in the PR
|
|
1644
|
+
|
|
1645
|
+
### **STAGE 4: Cross-File Integration Analysis**
|
|
1646
|
+
|
|
1647
|
+
1. **Naming and Import Consistency**:
|
|
1648
|
+
- Report naming inconsistencies only with specific examples and fixes
|
|
1649
|
+
- Report import/export issues only with specific missing/incorrect imports identified
|
|
1650
|
+
- Report duplicated logic only with specific refactoring suggestions
|
|
1651
|
+
|
|
1652
|
+
2. **Test Coverage and Quality**:
|
|
1653
|
+
- Report missing tests only if you can specify EXACTLY which test case should be added
|
|
1654
|
+
- Report test pattern deviations only with specific code fixes
|
|
1655
|
+
- Do NOT suggest "adding tests" without specifying the exact test
|
|
1656
|
+
|
|
1657
|
+
3. **Architectural Integration**:
|
|
1658
|
+
- Report breaking changes only if you can identify the SPECIFIC break
|
|
1659
|
+
- Report API inconsistencies only with SPECIFIC mismatches identified
|
|
1660
|
+
- Report separation of concerns issues only with SPECIFIC refactoring suggestions
|
|
1661
|
+
|
|
1662
|
+
### **STAGE 5: Consolidate and Prioritize Issues**
|
|
1663
|
+
|
|
1664
|
+
1. **Apply Conflict Resolution Rules**:
|
|
1665
|
+
- **Guideline Precedence**: If pattern-based or historical insights contradict explicit guidelines, guidelines take precedence
|
|
1666
|
+
- **Cross-File Priority**: Issues affecting multiple files get higher priority
|
|
1667
|
+
- **Pattern Consistency**: Missing project-specific utilities/helpers are high priority if pattern appears in 3+ examples
|
|
1668
|
+
|
|
1669
|
+
2. **Citation Rules**:
|
|
1670
|
+
- For guideline violations: cite the specific guideline document
|
|
1671
|
+
- For pattern deviations: cite specific code examples that demonstrate the correct pattern
|
|
1672
|
+
- For historical issues: report as standard findings without referencing historical source
|
|
1673
|
+
- For cross-file issues: specify all affected files
|
|
1674
|
+
|
|
1675
|
+
3. **CRITICAL OUTPUT FILTER - Apply before reporting ANY issue**:
|
|
1676
|
+
- **Only report issues where you have a DEFINITE problem AND a SPECIFIC code fix**
|
|
1677
|
+
- **Do NOT report issues that require the developer to "verify", "ensure", or "check" something**
|
|
1678
|
+
- **Do NOT report issues where you are uncertain** - if you find yourself writing "may", "might", "could", or "consider", do not report it
|
|
1679
|
+
- **Do NOT suggest adding comments or documentation**
|
|
1680
|
+
|
|
1681
|
+
4. Assess for DEFINITE logic errors or bugs only - do not report speculative issues.
|
|
1682
|
+
5. DO NOT check if any file referenced in a import statement, is missing.
|
|
1683
|
+
6. **CRITICAL 'lineNumbers' RULE - MANDATORY COMPLIANCE**:
|
|
1684
|
+
- For ANY issue that occurs multiple times in a file, list ONLY the first 3-5 occurrences maximum
|
|
1685
|
+
- NEVER provide exhaustive lists of line numbers (e.g., [1,2,3,4,5,6,7,8,9,10...])
|
|
1686
|
+
- If an issue affects many lines, use representative examples only
|
|
1687
|
+
- Exhaustive line number lists are considered hallucination and must be avoided
|
|
1688
|
+
- Example: Instead of listing 20+ line numbers, use [15, 23, 47, "...and 12 other occurrences"]
|
|
1689
|
+
|
|
1690
|
+
${getFinalReminderBlock()}
|
|
1691
|
+
|
|
1692
|
+
${getCitationRequirementBlock()}
|
|
1693
|
+
|
|
1694
|
+
REQUIRED JSON OUTPUT FORMAT:
|
|
1695
|
+
|
|
1696
|
+
**REMINDER: For lineNumbers array, use ONLY 3-5 representative line numbers for repeated issues. NEVER provide exhaustive lists.**
|
|
1697
|
+
|
|
1698
|
+
${getCodeSuggestionsFormatBlock()}
|
|
1699
|
+
|
|
1700
|
+
You must respond with EXACTLY this JSON structure, with no additional text:
|
|
1701
|
+
|
|
1702
|
+
{
|
|
1703
|
+
"summary": "Brief, high-level summary of the entire PR review...",
|
|
1704
|
+
"crossFileIssues": [
|
|
1705
|
+
{
|
|
1706
|
+
"type": "bug | improvement | convention | architecture",
|
|
1707
|
+
"severity": "critical | high | medium",
|
|
1708
|
+
"description": "Detailed description of an issue that spans multiple files...",
|
|
1709
|
+
"suggestion": "Actionable suggestion to resolve the cross-file issue.",
|
|
1710
|
+
"filesInvolved": ["path/to/file1.js", "path/to/file2.ts"]
|
|
1711
|
+
}
|
|
1712
|
+
],
|
|
1713
|
+
"fileSpecificIssues": {
|
|
1714
|
+
"path/to/file1.js": [
|
|
1715
|
+
{
|
|
1716
|
+
"type": "bug | improvement | convention | performance | security",
|
|
1717
|
+
"severity": "critical | high | medium",
|
|
1718
|
+
"description": "Description of the issue specific to this file.",
|
|
1719
|
+
"lineNumbers": [10, 15],
|
|
1720
|
+
"suggestion": "Concrete suggestion for fixing the issue in this file.",
|
|
1721
|
+
"codeSuggestion": {
|
|
1722
|
+
"startLine": 10,
|
|
1723
|
+
"endLine": 15,
|
|
1724
|
+
"oldCode": " const result = data.map(item => item.value);",
|
|
1725
|
+
"newCode": " const result = data?.map(item => item?.value) ?? [];"
|
|
1726
|
+
}
|
|
1727
|
+
}
|
|
1728
|
+
]
|
|
1729
|
+
},
|
|
1730
|
+
"recommendations": [
|
|
1731
|
+
{
|
|
1732
|
+
"type": "refactoring | testing | documentation",
|
|
1733
|
+
"description": "A high-level recommendation for improving the codebase...",
|
|
1734
|
+
"filesInvolved": ["path/to/relevant/file.js"]
|
|
1735
|
+
}
|
|
1736
|
+
]
|
|
1737
|
+
}
|
|
1738
|
+
`);
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
/**
|
|
1742
|
+
* Parse LLM analysis response
|
|
1743
|
+
*
|
|
1744
|
+
* @param {string} rawResponse - Raw LLM response
|
|
1745
|
+
* @returns {Object} Parsed analysis response
|
|
1746
|
+
*/
|
|
1747
|
+
function parseAnalysisResponse(rawResponse) {
|
|
1748
|
+
// rawResponse is now the full LLM response object with structured JSON from tool calling
|
|
1749
|
+
const parsedResponse = rawResponse.json;
|
|
1750
|
+
|
|
1751
|
+
if (!parsedResponse) {
|
|
1752
|
+
return {
|
|
1753
|
+
summary: 'Error parsing LLM response',
|
|
1754
|
+
issues: [],
|
|
1755
|
+
crossFileIssues: [],
|
|
1756
|
+
fileSpecificIssues: {},
|
|
1757
|
+
recommendations: [],
|
|
1758
|
+
rawResponse,
|
|
1759
|
+
parseError: 'Failed to parse JSON from LLM response',
|
|
1760
|
+
};
|
|
1761
|
+
}
|
|
1762
|
+
|
|
1763
|
+
// Check for holistic review structure, which contains fileSpecificIssues
|
|
1764
|
+
if (parsedResponse.fileSpecificIssues || parsedResponse.crossFileIssues || parsedResponse.recommendations) {
|
|
1765
|
+
return {
|
|
1766
|
+
summary: parsedResponse.summary || 'No summary provided',
|
|
1767
|
+
crossFileIssues: parsedResponse.crossFileIssues || [],
|
|
1768
|
+
fileSpecificIssues: parsedResponse.fileSpecificIssues || {},
|
|
1769
|
+
recommendations: parsedResponse.recommendations || [],
|
|
1770
|
+
rawResponse,
|
|
1771
|
+
};
|
|
1772
|
+
}
|
|
1773
|
+
|
|
1774
|
+
// Fallback to single-file review structure
|
|
1775
|
+
return {
|
|
1776
|
+
summary: parsedResponse.summary || 'No summary provided',
|
|
1777
|
+
issues: parsedResponse.issues || [],
|
|
1778
|
+
rawResponse,
|
|
1779
|
+
};
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
/**
|
|
1783
|
+
* Get PR comment context for historical analysis integration
|
|
1784
|
+
*
|
|
1785
|
+
* @param {string} filePath - Path to the file being analyzed
|
|
1786
|
+
* @param {Object} options - Options for context retrieval
|
|
1787
|
+
* @returns {Promise<Object>} Historical PR comment context
|
|
1788
|
+
*/
|
|
1789
|
+
async function getPRCommentContext(filePath, options = {}) {
|
|
1790
|
+
try {
|
|
1791
|
+
const { maxComments = 20, similarityThreshold = 0.15, projectPath = process.cwd(), precomputedQueryEmbedding = null } = options;
|
|
1792
|
+
|
|
1793
|
+
// Normalize file path for comparison
|
|
1794
|
+
const normalizedPath = path.normalize(filePath);
|
|
1795
|
+
const fileName = path.basename(normalizedPath);
|
|
1796
|
+
|
|
1797
|
+
debug(`[getPRCommentContext] Getting context for ${normalizedPath}`);
|
|
1798
|
+
|
|
1799
|
+
// Use pre-computed embedding if available, otherwise compute it
|
|
1800
|
+
let fileContent = '';
|
|
1801
|
+
let contentForSearch = '';
|
|
1802
|
+
|
|
1803
|
+
if (precomputedQueryEmbedding) {
|
|
1804
|
+
console.log(chalk.blue(`🔍 Using pre-computed query embedding for PR comment search`));
|
|
1805
|
+
// We still need the file content for the search function, but not for embedding
|
|
1806
|
+
try {
|
|
1807
|
+
fileContent = fs.readFileSync(filePath, 'utf8');
|
|
1808
|
+
const maxEmbeddingLength = 8000; // Keep consistent with original truncation
|
|
1809
|
+
contentForSearch = fileContent.length > maxEmbeddingLength ? fileContent.substring(0, maxEmbeddingLength) : fileContent;
|
|
1810
|
+
} catch (readError) {
|
|
1811
|
+
debug(`[getPRCommentContext] Could not read file ${filePath}: ${readError.message}`);
|
|
1812
|
+
return {
|
|
1813
|
+
success: false,
|
|
1814
|
+
hasContext: false,
|
|
1815
|
+
error: `Could not read file: ${readError.message}`,
|
|
1816
|
+
comments: [],
|
|
1817
|
+
summary: 'Failed to read file for context analysis',
|
|
1818
|
+
};
|
|
1819
|
+
}
|
|
1820
|
+
} else {
|
|
1821
|
+
// Fallback to original behavior if no pre-computed embedding provided
|
|
1822
|
+
try {
|
|
1823
|
+
fileContent = fs.readFileSync(filePath, 'utf8');
|
|
1824
|
+
} catch (readError) {
|
|
1825
|
+
debug(`[getPRCommentContext] Could not read file ${filePath}: ${readError.message}`);
|
|
1826
|
+
return {
|
|
1827
|
+
success: false,
|
|
1828
|
+
hasContext: false,
|
|
1829
|
+
error: `Could not read file: ${readError.message}`,
|
|
1830
|
+
comments: [],
|
|
1831
|
+
summary: 'Failed to read file for context analysis',
|
|
1832
|
+
};
|
|
1833
|
+
}
|
|
1834
|
+
|
|
1835
|
+
// Truncate content for embedding if too long
|
|
1836
|
+
const maxEmbeddingLength = 8000; // Reasonable limit for embedding
|
|
1837
|
+
contentForSearch = fileContent.length > maxEmbeddingLength ? fileContent.substring(0, maxEmbeddingLength) : fileContent;
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
// Detect if this is a test file using existing utility
|
|
1841
|
+
const isTest = isTestFile(filePath);
|
|
1842
|
+
|
|
1843
|
+
// Use semantic search to find similar PR comments
|
|
1844
|
+
let relevantComments = [];
|
|
1845
|
+
|
|
1846
|
+
console.log(chalk.blue(`🔍 Searching for PR comments with:`));
|
|
1847
|
+
|
|
1848
|
+
console.log(chalk.gray(` Project Path: ${projectPath}`));
|
|
1849
|
+
console.log(chalk.gray(` File: ${fileName}`));
|
|
1850
|
+
console.log(chalk.gray(` Similarity Threshold: ${similarityThreshold}`));
|
|
1851
|
+
console.log(chalk.gray(` Content Length: ${contentForSearch.length} chars`));
|
|
1852
|
+
console.log(chalk.gray(` Using Pre-computed Embedding: ${precomputedQueryEmbedding ? 'Yes' : 'No'}`));
|
|
1853
|
+
|
|
1854
|
+
try {
|
|
1855
|
+
console.log(chalk.blue(`🔍 Attempting hybrid search with chunking...`));
|
|
1856
|
+
relevantComments = await findRelevantPRComments(contentForSearch, {
|
|
1857
|
+
projectPath,
|
|
1858
|
+
limit: maxComments,
|
|
1859
|
+
isTestFile: isTest, // Pass test file context for filtering
|
|
1860
|
+
precomputedQueryEmbedding: precomputedQueryEmbedding, // Pass pre-computed embedding if available
|
|
1861
|
+
});
|
|
1862
|
+
console.log(chalk.green(`✅ Hybrid search returned ${relevantComments.length} comments`));
|
|
1863
|
+
if (relevantComments.length > 0) {
|
|
1864
|
+
console.log(chalk.blue(`Top comment similarities:`));
|
|
1865
|
+
relevantComments.slice(0, 3).forEach((comment, idx) => {
|
|
1866
|
+
console.log(
|
|
1867
|
+
chalk.gray(` ${idx + 1}. Score: ${comment.similarity_score?.toFixed(3)} - ${comment.comment_text?.substring(0, 80)}...`)
|
|
1868
|
+
);
|
|
1869
|
+
});
|
|
1870
|
+
}
|
|
1871
|
+
} catch (dbError) {
|
|
1872
|
+
console.log(chalk.yellow(`⚠️ Hybrid search failed: ${dbError.message}`));
|
|
1873
|
+
debug(`[getPRCommentContext] Hybrid search failed: ${dbError.message}`);
|
|
1874
|
+
// No fallback needed - if hybrid search fails, we just return empty results
|
|
1875
|
+
relevantComments = [];
|
|
1876
|
+
}
|
|
1877
|
+
|
|
1878
|
+
console.log('Total relevant comments number:', relevantComments.length);
|
|
1879
|
+
|
|
1880
|
+
// Extract patterns and insights
|
|
1881
|
+
const patterns = extractCommentPatterns(relevantComments);
|
|
1882
|
+
const summary = generateContextSummary(relevantComments, patterns);
|
|
1883
|
+
|
|
1884
|
+
debug(`[getPRCommentContext] Found ${relevantComments.length} relevant comments for ${normalizedPath}`);
|
|
1885
|
+
|
|
1886
|
+
return {
|
|
1887
|
+
success: true,
|
|
1888
|
+
hasContext: relevantComments.length > 0,
|
|
1889
|
+
filePath: normalizedPath,
|
|
1890
|
+
comments: relevantComments.map(formatCommentForContext),
|
|
1891
|
+
patterns,
|
|
1892
|
+
summary,
|
|
1893
|
+
metadata: {
|
|
1894
|
+
totalCommentsFound: relevantComments.length,
|
|
1895
|
+
relevantCommentsReturned: relevantComments.length,
|
|
1896
|
+
averageRelevanceScore:
|
|
1897
|
+
relevantComments.length > 0 ? relevantComments.reduce((sum, c) => sum + c.similarity_score, 0) / relevantComments.length : 0,
|
|
1898
|
+
searchMethod:
|
|
1899
|
+
relevantComments.length > 0 && relevantComments[0].similarity_score !== 0.5 ? 'semantic_embedding' : 'file_path_fallback',
|
|
1900
|
+
},
|
|
1901
|
+
};
|
|
1902
|
+
} catch (error) {
|
|
1903
|
+
debug(`[getPRCommentContext] Error getting PR comment context: ${error.message}`);
|
|
1904
|
+
return {
|
|
1905
|
+
success: false,
|
|
1906
|
+
hasContext: false,
|
|
1907
|
+
error: error.message,
|
|
1908
|
+
comments: [],
|
|
1909
|
+
summary: 'Failed to retrieve historical context',
|
|
1910
|
+
};
|
|
1911
|
+
}
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
/**
|
|
1915
|
+
* Extract patterns from historical comments
|
|
1916
|
+
*/
|
|
1917
|
+
function extractCommentPatterns(comments) {
|
|
1918
|
+
const patterns = {
|
|
1919
|
+
commonIssues: [],
|
|
1920
|
+
reviewPatterns: [],
|
|
1921
|
+
technicalConcerns: [],
|
|
1922
|
+
suggestedImprovements: [],
|
|
1923
|
+
};
|
|
1924
|
+
|
|
1925
|
+
// Analyze comment content for patterns
|
|
1926
|
+
const allText = comments
|
|
1927
|
+
.map((c) => c.body || '')
|
|
1928
|
+
.join(' ')
|
|
1929
|
+
.toLowerCase();
|
|
1930
|
+
|
|
1931
|
+
// Common issue keywords
|
|
1932
|
+
const issueKeywords = ['bug', 'error', 'issue', 'problem', 'broken', 'fail'];
|
|
1933
|
+
patterns.commonIssues = issueKeywords.filter((keyword) => allText.includes(keyword));
|
|
1934
|
+
|
|
1935
|
+
// Review pattern keywords
|
|
1936
|
+
const reviewKeywords = ['suggest', 'recommend', 'consider', 'improve', 'better'];
|
|
1937
|
+
patterns.reviewPatterns = reviewKeywords.filter((keyword) => allText.includes(keyword));
|
|
1938
|
+
|
|
1939
|
+
// Technical concern keywords
|
|
1940
|
+
const techKeywords = ['performance', 'security', 'memory', 'optimization', 'scalability'];
|
|
1941
|
+
patterns.technicalConcerns = techKeywords.filter((keyword) => allText.includes(keyword));
|
|
1942
|
+
|
|
1943
|
+
return patterns;
|
|
1944
|
+
}
|
|
1945
|
+
|
|
1946
|
+
/**
|
|
1947
|
+
* Generate summary of historical context
|
|
1948
|
+
*/
|
|
1949
|
+
function generateContextSummary(comments, patterns) {
|
|
1950
|
+
if (comments.length === 0) {
|
|
1951
|
+
return 'No relevant historical comments found for this file.';
|
|
1952
|
+
}
|
|
1953
|
+
|
|
1954
|
+
const summaryParts = [`Found ${comments.length} relevant historical comments.`];
|
|
1955
|
+
|
|
1956
|
+
if (patterns.commonIssues.length > 0) {
|
|
1957
|
+
summaryParts.push(`Common issues mentioned: ${patterns.commonIssues.join(', ')}.`);
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
if (patterns.reviewPatterns.length > 0) {
|
|
1961
|
+
summaryParts.push(`Review suggestions often involve: ${patterns.reviewPatterns.join(', ')}.`);
|
|
1962
|
+
}
|
|
1963
|
+
|
|
1964
|
+
if (patterns.technicalConcerns.length > 0) {
|
|
1965
|
+
summaryParts.push(`Technical concerns raised: ${patterns.technicalConcerns.join(', ')}.`);
|
|
1966
|
+
}
|
|
1967
|
+
|
|
1968
|
+
// Add recency information
|
|
1969
|
+
const recentComments = comments.filter((c) => {
|
|
1970
|
+
const daysSince = (Date.now() - new Date(c.created_at).getTime()) / (1000 * 60 * 60 * 24);
|
|
1971
|
+
return daysSince <= 30;
|
|
1972
|
+
});
|
|
1973
|
+
|
|
1974
|
+
if (recentComments.length > 0) {
|
|
1975
|
+
summaryParts.push(`${recentComments.length} comments from the last 30 days.`);
|
|
1976
|
+
}
|
|
1977
|
+
|
|
1978
|
+
return summaryParts.join(' ');
|
|
1979
|
+
}
|
|
1980
|
+
|
|
1981
|
+
/**
|
|
1982
|
+
* Format comment for context usage
|
|
1983
|
+
*/
|
|
1984
|
+
function formatCommentForContext(comment) {
|
|
1985
|
+
return {
|
|
1986
|
+
id: comment.id,
|
|
1987
|
+
author: comment.author || comment.author_login, // Handle both field names
|
|
1988
|
+
body: (comment.comment_text || comment.body || '').substring(0, 500), // Handle both field names and truncate
|
|
1989
|
+
createdAt: comment.created_at,
|
|
1990
|
+
commentType: comment.comment_type,
|
|
1991
|
+
filePath: comment.file_path,
|
|
1992
|
+
prNumber: comment.pr_number,
|
|
1993
|
+
prTitle: comment.pr_title,
|
|
1994
|
+
relevanceScore: comment.similarity_score || comment.relevanceScore, // Handle both field names
|
|
1995
|
+
};
|
|
1996
|
+
}
|
|
1997
|
+
|
|
1998
|
+
/**
|
|
1999
|
+
* Perform holistic PR analysis using unified context
|
|
2000
|
+
* @param {Object} options - Analysis options including prFiles and unifiedContext
|
|
2001
|
+
* @returns {Promise<Object>} Holistic analysis results
|
|
2002
|
+
*/
|
|
2003
|
+
async function performHolisticPRAnalysis(options) {
|
|
2004
|
+
try {
|
|
2005
|
+
const { prFiles, unifiedContext, customDocs } = options;
|
|
2006
|
+
|
|
2007
|
+
console.log(chalk.blue(`🔍 Performing holistic analysis of ${prFiles.length} files with unified context...`));
|
|
2008
|
+
|
|
2009
|
+
// Retrieve project architecture summary
|
|
2010
|
+
console.log(chalk.blue('--- Retrieving Project Architecture Context for Holistic PR Review ---'));
|
|
2011
|
+
const projectPath = options.projectPath || process.cwd();
|
|
2012
|
+
const projectSummary = await getProjectSummary(projectPath);
|
|
2013
|
+
|
|
2014
|
+
// Create a synthetic file context for holistic analysis
|
|
2015
|
+
const holisticContext = {
|
|
2016
|
+
file: {
|
|
2017
|
+
path: 'PR_HOLISTIC_REVIEW',
|
|
2018
|
+
name: 'Pull Request',
|
|
2019
|
+
directory: '.',
|
|
2020
|
+
directoryName: '.',
|
|
2021
|
+
language: 'diff',
|
|
2022
|
+
content: prFiles.map((f) => f.diff).join('\n\n'),
|
|
2023
|
+
reviewType: 'PR HOLISTIC REVIEW',
|
|
2024
|
+
isDiffReview: true,
|
|
2025
|
+
prFiles: prFiles, // Add all PR files for context
|
|
2026
|
+
},
|
|
2027
|
+
context: [
|
|
2028
|
+
{
|
|
2029
|
+
title: 'Similar Code Examples',
|
|
2030
|
+
description: 'Code patterns from the project that are similar to the files being reviewed',
|
|
2031
|
+
items: unifiedContext.codeExamples.slice(0, 10),
|
|
2032
|
+
},
|
|
2033
|
+
{
|
|
2034
|
+
title: 'Project Guidelines',
|
|
2035
|
+
description: 'Documentation and guidelines relevant to this code',
|
|
2036
|
+
items: unifiedContext.guidelines.slice(0, 8),
|
|
2037
|
+
},
|
|
2038
|
+
{
|
|
2039
|
+
title: 'Historical Review Comments',
|
|
2040
|
+
description: 'Similar code patterns and issues identified by human reviewers in past PRs',
|
|
2041
|
+
items: unifiedContext.prComments.slice(0, 10),
|
|
2042
|
+
},
|
|
2043
|
+
],
|
|
2044
|
+
customDocs: unifiedContext.customDocChunks || options.relevantCustomDocChunks || customDocs, // Use unified chunks first, then relevant chunks, then full docs
|
|
2045
|
+
projectSummary: projectSummary, // Add project architecture summary
|
|
2046
|
+
metadata: {
|
|
2047
|
+
hasCodeExamples: unifiedContext.codeExamples.length > 0,
|
|
2048
|
+
hasGuidelines: unifiedContext.guidelines.length > 0,
|
|
2049
|
+
hasPRHistory: unifiedContext.prComments.length > 0,
|
|
2050
|
+
hasProjectSummary: !!projectSummary,
|
|
2051
|
+
analysisTimestamp: new Date().toISOString(),
|
|
2052
|
+
reviewType: 'PR HOLISTIC REVIEW',
|
|
2053
|
+
isPRReview: true,
|
|
2054
|
+
isHolisticReview: true,
|
|
2055
|
+
},
|
|
2056
|
+
options: options,
|
|
2057
|
+
};
|
|
2058
|
+
|
|
2059
|
+
// Add verbose debug logging similar to individual file reviews
|
|
2060
|
+
console.log(chalk.magenta('--- Holistic PR Review: Guidelines Sent to LLM ---'));
|
|
2061
|
+
if (unifiedContext.guidelines.length > 0) {
|
|
2062
|
+
unifiedContext.guidelines.slice(0, 10).forEach((g, i) => {
|
|
2063
|
+
console.log(
|
|
2064
|
+
chalk.magenta(
|
|
2065
|
+
` [${i + 1}] Path: ${g.path} ${g.headingText || g.heading_text ? `(Heading: "${g.headingText || g.heading_text}")` : ''}`
|
|
2066
|
+
)
|
|
2067
|
+
);
|
|
2068
|
+
console.log(chalk.gray(` Content: ${g.content.substring(0, 100).replace(/\n/g, ' ')}...`));
|
|
2069
|
+
});
|
|
2070
|
+
} else {
|
|
2071
|
+
console.log(chalk.magenta(' (None)'));
|
|
2072
|
+
}
|
|
2073
|
+
|
|
2074
|
+
console.log(chalk.magenta('--- Holistic PR Review: Code Examples Sent to LLM ---'));
|
|
2075
|
+
if (unifiedContext.codeExamples.length > 0) {
|
|
2076
|
+
unifiedContext.codeExamples.slice(0, 10).forEach((ex, i) => {
|
|
2077
|
+
console.log(chalk.magenta(` [${i + 1}] Path: ${ex.path} (Similarity: ${ex.similarity?.toFixed(3) || 'N/A'})`));
|
|
2078
|
+
console.log(chalk.gray(` Content: ${ex.content.substring(0, 100).replace(/\\n/g, ' ')}...`));
|
|
2079
|
+
});
|
|
2080
|
+
} else {
|
|
2081
|
+
console.log(chalk.magenta(' (None)'));
|
|
2082
|
+
}
|
|
2083
|
+
|
|
2084
|
+
console.log(chalk.magenta('--- Holistic PR Review: Top Historic Comments Sent to LLM ---'));
|
|
2085
|
+
if (unifiedContext.prComments.length > 0) {
|
|
2086
|
+
unifiedContext.prComments.slice(0, 5).forEach((comment, i) => {
|
|
2087
|
+
console.log(
|
|
2088
|
+
chalk.magenta(
|
|
2089
|
+
` [${i + 1}] PR #${comment.prNumber} by ${comment.author} (Relevance: ${(comment.relevanceScore * 100).toFixed(1)}%)`
|
|
2090
|
+
)
|
|
2091
|
+
);
|
|
2092
|
+
console.log(chalk.gray(` File: ${comment.filePath}`));
|
|
2093
|
+
console.log(chalk.gray(` Comment: ${comment.body.substring(0, 100).replace(/\n/g, ' ')}...`));
|
|
2094
|
+
});
|
|
2095
|
+
} else {
|
|
2096
|
+
console.log(chalk.magenta(' (None)'));
|
|
2097
|
+
}
|
|
2098
|
+
|
|
2099
|
+
console.log(chalk.magenta('--- Holistic PR Review: Custom Document Chunks Sent to LLM ---'));
|
|
2100
|
+
if (unifiedContext.customDocChunks && unifiedContext.customDocChunks.length > 0) {
|
|
2101
|
+
unifiedContext.customDocChunks.forEach((chunk, i) => {
|
|
2102
|
+
console.log(chalk.magenta(` [${i + 1}] Document: "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`));
|
|
2103
|
+
console.log(chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`));
|
|
2104
|
+
console.log(chalk.gray(` Content: ${chunk.content.substring(0, 100).replace(/\n/g, ' ')}...`));
|
|
2105
|
+
});
|
|
2106
|
+
} else {
|
|
2107
|
+
console.log(chalk.magenta(' (None)'));
|
|
2108
|
+
}
|
|
2109
|
+
console.log(chalk.magenta('--- Sending Holistic PR Analysis Prompt to LLM ---'));
|
|
2110
|
+
|
|
2111
|
+
// Call the centralized analysis function
|
|
2112
|
+
const parsedResponse = await callLLMForAnalysis(holisticContext, {
|
|
2113
|
+
...options,
|
|
2114
|
+
isHolisticPRReview: true,
|
|
2115
|
+
});
|
|
2116
|
+
|
|
2117
|
+
// Debug logging
|
|
2118
|
+
console.log(chalk.blue(`🐛 Holistic analysis parsed response:`));
|
|
2119
|
+
console.log(chalk.gray(`Summary: ${parsedResponse.summary?.substring(0, 100)}...`));
|
|
2120
|
+
console.log(chalk.gray(`Cross-file issues: ${parsedResponse.crossFileIssues?.length || 0}`));
|
|
2121
|
+
console.log(chalk.gray(`File-specific issues keys: ${Object.keys(parsedResponse.fileSpecificIssues || {}).join(', ')}`));
|
|
2122
|
+
console.log(chalk.gray(`Recommendations: ${parsedResponse.recommendations?.length || 0}`));
|
|
2123
|
+
|
|
2124
|
+
// Filter out low severity issues (formatting/style concerns handled by linters)
|
|
2125
|
+
// Note: The LLM prompt instructs not to generate low severity issues, but this filter
|
|
2126
|
+
// serves as a safety net in case any slip through despite the prompt instructions
|
|
2127
|
+
const filteredResponse = filterLowSeverityIssues(parsedResponse, { verbose: options.verbose });
|
|
2128
|
+
|
|
2129
|
+
return {
|
|
2130
|
+
success: true,
|
|
2131
|
+
filePath: 'PR_HOLISTIC_REVIEW',
|
|
2132
|
+
language: 'diff',
|
|
2133
|
+
results: {
|
|
2134
|
+
summary: filteredResponse.summary || 'Holistic PR review completed',
|
|
2135
|
+
crossFileIssues: filteredResponse.crossFileIssues || [],
|
|
2136
|
+
fileSpecificIssues: filteredResponse.fileSpecificIssues || {},
|
|
2137
|
+
recommendations: filteredResponse.recommendations || [],
|
|
2138
|
+
},
|
|
2139
|
+
context: {
|
|
2140
|
+
codeExamples: unifiedContext.codeExamples.length,
|
|
2141
|
+
guidelines: unifiedContext.guidelines.length,
|
|
2142
|
+
prComments: unifiedContext.prComments.length,
|
|
2143
|
+
},
|
|
2144
|
+
metadata: {
|
|
2145
|
+
analysisTimestamp: new Date().toISOString(),
|
|
2146
|
+
featuresUsed: {
|
|
2147
|
+
codeExamples: unifiedContext.codeExamples.length > 0,
|
|
2148
|
+
guidelines: unifiedContext.guidelines.length > 0,
|
|
2149
|
+
prHistory: unifiedContext.prComments.length > 0,
|
|
2150
|
+
},
|
|
2151
|
+
},
|
|
2152
|
+
};
|
|
2153
|
+
} catch (error) {
|
|
2154
|
+
console.error(chalk.red(`Error in holistic PR analysis: ${error.message}`));
|
|
2155
|
+
return {
|
|
2156
|
+
success: false,
|
|
2157
|
+
error: error.message,
|
|
2158
|
+
filePath: 'PR_HOLISTIC_REVIEW',
|
|
2159
|
+
};
|
|
2160
|
+
}
|
|
2161
|
+
}
|
|
2162
|
+
|
|
2163
|
+
/**
|
|
2164
|
+
* NEW: Gathers all context for a single file.
|
|
2165
|
+
* This encapsulates the logic for finding docs, code, and PR comments.
|
|
2166
|
+
* @param {string} filePath - Path to the file to get context for.
|
|
2167
|
+
* @param {string} content - The content of the file (or diff).
|
|
2168
|
+
* @param {Object} options - Analysis options.
|
|
2169
|
+
* @returns {Promise<Object>} An object containing the gathered context.
|
|
2170
|
+
*/
|
|
2171
|
+
async function getContextForFile(filePath, content, options = {}) {
|
|
2172
|
+
const RELEVANT_CHUNK_THRESHOLD = 0.1;
|
|
2173
|
+
const W_H1_SIM = 0.2;
|
|
2174
|
+
const W_DOC_CONTEXT_MATCH = 0.6;
|
|
2175
|
+
const GENERIC_DOC_PENALTY_FACTOR = 0.7;
|
|
2176
|
+
const GUIDELINE_CANDIDATE_LIMIT = 100;
|
|
2177
|
+
const CODE_EXAMPLE_LIMIT = 40;
|
|
2178
|
+
const MAX_FINAL_EXAMPLES = 8;
|
|
2179
|
+
|
|
2180
|
+
// --- Stage 0: Initialize Tables (ONE-TIME SETUP) ---
|
|
2181
|
+
// Note: This may be called concurrently. `initializeTables` should be idempotent.
|
|
2182
|
+
try {
|
|
2183
|
+
await embeddingsSystem.initialize();
|
|
2184
|
+
} catch (initError) {
|
|
2185
|
+
console.warn(chalk.yellow(`Database initialization warning: ${initError.message}`));
|
|
2186
|
+
}
|
|
2187
|
+
|
|
2188
|
+
const projectPath = options.projectPath || (options.directory ? path.resolve(options.directory) : null) || process.cwd();
|
|
2189
|
+
const language = detectLanguageFromExtension(path.extname(filePath).toLowerCase());
|
|
2190
|
+
const fileTypeInfo = detectFileType(filePath, content);
|
|
2191
|
+
const isTestFile = fileTypeInfo.isTest;
|
|
2192
|
+
|
|
2193
|
+
const reviewedSnippetContext = inferContextFromCodeContent(content, language);
|
|
2194
|
+
debug('[getContextForFile] Reviewed Snippet Context:', reviewedSnippetContext);
|
|
2195
|
+
|
|
2196
|
+
let analyzedFileEmbedding = null;
|
|
2197
|
+
let fileContentQueryEmbedding = null;
|
|
2198
|
+
let guidelineQueryEmbedding = null;
|
|
2199
|
+
|
|
2200
|
+
if (content.trim().length > 0) {
|
|
2201
|
+
analyzedFileEmbedding = await embeddingsSystem.calculateEmbedding(content.substring(0, MAX_EMBEDDING_CONTENT_LENGTH));
|
|
2202
|
+
const queryContent = isTestFile ? `${content}\\n// Looking for similar test files and testing patterns` : content;
|
|
2203
|
+
fileContentQueryEmbedding = await embeddingsSystem.calculateQueryEmbedding(queryContent);
|
|
2204
|
+
}
|
|
2205
|
+
|
|
2206
|
+
const guidelineQuery = isTestFile
|
|
2207
|
+
? createTestGuidelineQueryForLLMRetrieval(content, reviewedSnippetContext, language)
|
|
2208
|
+
: createGuidelineQueryForLLMRetrieval(content, reviewedSnippetContext, language);
|
|
2209
|
+
|
|
2210
|
+
if (guidelineQuery && guidelineQuery.trim().length > 0) {
|
|
2211
|
+
guidelineQueryEmbedding = await embeddingsSystem.calculateQueryEmbedding(guidelineQuery);
|
|
2212
|
+
}
|
|
2213
|
+
|
|
2214
|
+
console.log(chalk.blue('� Starting parallel context retrieval...'));
|
|
2215
|
+
// Helper function to process custom documents in parallel (with caching)
|
|
2216
|
+
const processCustomDocuments = async () => {
|
|
2217
|
+
// Check if preprocessed chunks are available (from PR-level processing)
|
|
2218
|
+
if (options.preprocessedCustomDocChunks && options.preprocessedCustomDocChunks.length > 0) {
|
|
2219
|
+
console.log(chalk.blue(`📄 Using preprocessed custom document chunks (${options.preprocessedCustomDocChunks.length} available)`));
|
|
2220
|
+
|
|
2221
|
+
// Use the guideline query for finding relevant custom document chunks
|
|
2222
|
+
const relevantChunks = await embeddingsSystem.findRelevantCustomDocChunks(guidelineQuery, options.preprocessedCustomDocChunks, {
|
|
2223
|
+
limit: 5,
|
|
2224
|
+
similarityThreshold: 0.3,
|
|
2225
|
+
queryContextForReranking: reviewedSnippetContext,
|
|
2226
|
+
useReranking: true,
|
|
2227
|
+
precomputedQueryEmbedding: guidelineQueryEmbedding,
|
|
2228
|
+
queryFilePath: filePath,
|
|
2229
|
+
});
|
|
2230
|
+
|
|
2231
|
+
console.log(chalk.green(`📄 Found ${relevantChunks.length} relevant custom document chunks`));
|
|
2232
|
+
|
|
2233
|
+
// Log which chunks made the cut
|
|
2234
|
+
if (relevantChunks.length > 0) {
|
|
2235
|
+
console.log(chalk.cyan('📋 Custom Document Chunks Selected:'));
|
|
2236
|
+
relevantChunks.forEach((chunk, i) => {
|
|
2237
|
+
console.log(chalk.cyan(` [${i + 1}] "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`));
|
|
2238
|
+
console.log(chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`));
|
|
2239
|
+
console.log(chalk.gray(` Content: ${chunk.content.substring(0, 80).replace(/\n/g, ' ')}...`));
|
|
2240
|
+
});
|
|
2241
|
+
}
|
|
2242
|
+
|
|
2243
|
+
return relevantChunks;
|
|
2244
|
+
}
|
|
2245
|
+
|
|
2246
|
+
// Fallback to original processing if no preprocessed chunks available
|
|
2247
|
+
if (!options.customDocs || options.customDocs.length === 0) {
|
|
2248
|
+
return [];
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
try {
|
|
2252
|
+
console.log(chalk.blue('📄 Processing custom documents for context...'));
|
|
2253
|
+
|
|
2254
|
+
// Check if custom documents are already processed for this project
|
|
2255
|
+
let processedChunks = await checkExistingCustomDocumentChunks(projectPath);
|
|
2256
|
+
|
|
2257
|
+
if (!processedChunks || processedChunks.length === 0) {
|
|
2258
|
+
console.log(chalk.cyan('📄 Custom documents not yet processed for this project, processing now...'));
|
|
2259
|
+
// Process custom documents into chunks (only if not already processed)
|
|
2260
|
+
processedChunks = await embeddingsSystem.processCustomDocumentsInMemory(options.customDocs, projectPath);
|
|
2261
|
+
} else {
|
|
2262
|
+
console.log(chalk.green(`📄 Reusing ${processedChunks.length} already processed custom document chunks`));
|
|
2263
|
+
}
|
|
2264
|
+
|
|
2265
|
+
if (processedChunks.length > 0) {
|
|
2266
|
+
// Use the guideline query for finding relevant custom document chunks
|
|
2267
|
+
const relevantChunks = await embeddingsSystem.findRelevantCustomDocChunks(guidelineQuery, processedChunks, {
|
|
2268
|
+
limit: 5,
|
|
2269
|
+
similarityThreshold: 0.3,
|
|
2270
|
+
queryContextForReranking: reviewedSnippetContext,
|
|
2271
|
+
useReranking: true,
|
|
2272
|
+
precomputedQueryEmbedding: guidelineQueryEmbedding,
|
|
2273
|
+
queryFilePath: filePath,
|
|
2274
|
+
});
|
|
2275
|
+
|
|
2276
|
+
console.log(chalk.green(`📄 Found ${relevantChunks.length} relevant custom document chunks`));
|
|
2277
|
+
|
|
2278
|
+
// Log which chunks made the cut
|
|
2279
|
+
if (relevantChunks.length > 0) {
|
|
2280
|
+
console.log(chalk.cyan('📋 Custom Document Chunks Selected:'));
|
|
2281
|
+
relevantChunks.forEach((chunk, i) => {
|
|
2282
|
+
console.log(chalk.cyan(` [${i + 1}] "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`));
|
|
2283
|
+
console.log(chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`));
|
|
2284
|
+
console.log(chalk.gray(` Content: ${chunk.content.substring(0, 80).replace(/\n/g, ' ')}...`));
|
|
2285
|
+
});
|
|
2286
|
+
}
|
|
2287
|
+
|
|
2288
|
+
return relevantChunks;
|
|
2289
|
+
}
|
|
2290
|
+
} catch (error) {
|
|
2291
|
+
console.error(chalk.red(`Error processing custom documents: ${error.message}`));
|
|
2292
|
+
}
|
|
2293
|
+
|
|
2294
|
+
return [];
|
|
2295
|
+
};
|
|
2296
|
+
|
|
2297
|
+
// Helper function to check if custom documents are already processed
|
|
2298
|
+
const checkExistingCustomDocumentChunks = async (projectPath) => {
|
|
2299
|
+
try {
|
|
2300
|
+
// Use the statically imported function
|
|
2301
|
+
return await embeddingsSystem.getExistingCustomDocumentChunks(projectPath);
|
|
2302
|
+
} catch {
|
|
2303
|
+
console.log(chalk.gray('No existing custom document chunks found, will process from scratch'));
|
|
2304
|
+
return [];
|
|
2305
|
+
}
|
|
2306
|
+
};
|
|
2307
|
+
|
|
2308
|
+
const [prContextResult, guidelineCandidates, codeExampleCandidates, relevantCustomDocChunks] = await Promise.all([
|
|
2309
|
+
getPRCommentContext(filePath, {
|
|
2310
|
+
...options,
|
|
2311
|
+
projectPath,
|
|
2312
|
+
precomputedQueryEmbedding: fileContentQueryEmbedding,
|
|
2313
|
+
maxComments: MAX_PR_COMMENTS_FOR_CONTEXT,
|
|
2314
|
+
similarityThreshold: options.prSimilarityThreshold || 0.3,
|
|
2315
|
+
timeout: options.prTimeout || 300000,
|
|
2316
|
+
repository: options.repository || null,
|
|
2317
|
+
}),
|
|
2318
|
+
embeddingsSystem.findRelevantDocs(guidelineQuery, {
|
|
2319
|
+
...options,
|
|
2320
|
+
projectPath,
|
|
2321
|
+
precomputedQueryEmbedding: guidelineQueryEmbedding,
|
|
2322
|
+
limit: GUIDELINE_CANDIDATE_LIMIT,
|
|
2323
|
+
similarityThreshold: 0.05,
|
|
2324
|
+
useReranking: true,
|
|
2325
|
+
queryContextForReranking: reviewedSnippetContext,
|
|
2326
|
+
}),
|
|
2327
|
+
embeddingsSystem.findSimilarCode(isTestFile ? `${content}\\n// Looking for similar test files and testing patterns` : content, {
|
|
2328
|
+
...options,
|
|
2329
|
+
projectPath,
|
|
2330
|
+
isTestFile,
|
|
2331
|
+
precomputedQueryEmbedding: fileContentQueryEmbedding,
|
|
2332
|
+
limit: CODE_EXAMPLE_LIMIT,
|
|
2333
|
+
similarityThreshold: 0.3,
|
|
2334
|
+
queryFilePath: filePath,
|
|
2335
|
+
includeProjectStructure: false,
|
|
2336
|
+
}),
|
|
2337
|
+
processCustomDocuments(), // Add custom document processing as 4th parallel operation
|
|
2338
|
+
]).catch((error) => {
|
|
2339
|
+
console.warn(chalk.yellow(`Parallel context retrieval failed: ${error.message}`));
|
|
2340
|
+
return [[], [], [], []];
|
|
2341
|
+
});
|
|
2342
|
+
|
|
2343
|
+
const prCommentContext = prContextResult?.comments || [];
|
|
2344
|
+
const prContextAvailable = prCommentContext.length > 0;
|
|
2345
|
+
console.log(chalk.green(`✅ Found ${prCommentContext.length} relevant PR comments`));
|
|
2346
|
+
|
|
2347
|
+
const documentChunks = Array.isArray(guidelineCandidates) ? guidelineCandidates.filter((c) => c.type === 'documentation-chunk') : [];
|
|
2348
|
+
const chunksByDocument = new Map();
|
|
2349
|
+
for (const chunk of documentChunks) {
|
|
2350
|
+
if (!chunksByDocument.has(chunk.path)) {
|
|
2351
|
+
chunksByDocument.set(chunk.path, []);
|
|
2352
|
+
}
|
|
2353
|
+
chunksByDocument.get(chunk.path).push(chunk);
|
|
2354
|
+
}
|
|
2355
|
+
|
|
2356
|
+
const scoredDocuments = [];
|
|
2357
|
+
|
|
2358
|
+
for (const [docPath, docChunks] of chunksByDocument.entries()) {
|
|
2359
|
+
const docH1 = docChunks[0]?.document_title || path.basename(docPath, path.extname(docPath));
|
|
2360
|
+
|
|
2361
|
+
// FAST-PATH OPTIMIZATION: Use shared utility for generic documents
|
|
2362
|
+
let candidateDocFullContext;
|
|
2363
|
+
if (isGenericDocument(docPath, docH1)) {
|
|
2364
|
+
candidateDocFullContext = getGenericDocumentContext(docPath, docH1);
|
|
2365
|
+
debug(`[FAST-PATH] Using pre-computed context for generic document in RAG: ${docPath}`);
|
|
2366
|
+
} else {
|
|
2367
|
+
candidateDocFullContext = await inferContextFromDocumentContent(docPath, docH1, docChunks, language);
|
|
2368
|
+
}
|
|
2369
|
+
const relevantChunksForDoc = docChunks.filter((c) => c.similarity >= RELEVANT_CHUNK_THRESHOLD);
|
|
2370
|
+
if (relevantChunksForDoc.length === 0) continue;
|
|
2371
|
+
|
|
2372
|
+
const maxChunkScoreInDoc = Math.max(...relevantChunksForDoc.map((c) => c.similarity));
|
|
2373
|
+
const avgChunkScoreInDoc = relevantChunksForDoc.reduce((sum, c) => sum + c.similarity, 0) / relevantChunksForDoc.length;
|
|
2374
|
+
const numRelevantChunks = relevantChunksForDoc.length;
|
|
2375
|
+
const semanticQualityScore = maxChunkScoreInDoc * 0.5 + avgChunkScoreInDoc * 0.3 + Math.min(numRelevantChunks, 5) * 0.04;
|
|
2376
|
+
|
|
2377
|
+
let docLevelContextMatchScore = 0;
|
|
2378
|
+
if (
|
|
2379
|
+
reviewedSnippetContext.area !== 'Unknown' &&
|
|
2380
|
+
candidateDocFullContext.area !== 'Unknown' &&
|
|
2381
|
+
candidateDocFullContext.area !== 'General'
|
|
2382
|
+
) {
|
|
2383
|
+
if (reviewedSnippetContext.area === candidateDocFullContext.area) {
|
|
2384
|
+
docLevelContextMatchScore += 0.8;
|
|
2385
|
+
for (const tech of reviewedSnippetContext.dominantTech) {
|
|
2386
|
+
if (candidateDocFullContext.dominantTech.map((t) => t.toLowerCase()).includes(tech.toLowerCase())) {
|
|
2387
|
+
docLevelContextMatchScore += 0.2;
|
|
2388
|
+
break;
|
|
2389
|
+
}
|
|
2390
|
+
}
|
|
2391
|
+
} else if (reviewedSnippetContext.area !== 'GeneralJS_TS') {
|
|
2392
|
+
docLevelContextMatchScore -= 0.2;
|
|
2393
|
+
}
|
|
2394
|
+
}
|
|
2395
|
+
|
|
2396
|
+
let docH1RelevanceToReviewedFile = 0;
|
|
2397
|
+
if (docH1 && analyzedFileEmbedding) {
|
|
2398
|
+
const docH1Embedding = await embeddingsSystem.calculateEmbedding(docH1);
|
|
2399
|
+
if (docH1Embedding) {
|
|
2400
|
+
docH1RelevanceToReviewedFile = calculateCosineSimilarity(analyzedFileEmbedding, docH1Embedding);
|
|
2401
|
+
}
|
|
2402
|
+
}
|
|
2403
|
+
|
|
2404
|
+
const isGenericByName = isGenericDocument(docPath, docH1);
|
|
2405
|
+
let genericDocPenaltyFactor = 1.0;
|
|
2406
|
+
if (candidateDocFullContext.isGeneralPurposeReadmeStyle || isGenericByName) {
|
|
2407
|
+
if (reviewedSnippetContext.area !== 'DevOps' && (docLevelContextMatchScore < 0.8 || isGenericByName)) {
|
|
2408
|
+
genericDocPenaltyFactor = GENERIC_DOC_PENALTY_FACTOR;
|
|
2409
|
+
}
|
|
2410
|
+
}
|
|
2411
|
+
|
|
2412
|
+
let finalDocScore =
|
|
2413
|
+
semanticQualityScore * 0.2 + docLevelContextMatchScore * W_DOC_CONTEXT_MATCH + docH1RelevanceToReviewedFile * W_H1_SIM;
|
|
2414
|
+
finalDocScore *= genericDocPenaltyFactor;
|
|
2415
|
+
|
|
2416
|
+
scoredDocuments.push({
|
|
2417
|
+
path: docPath,
|
|
2418
|
+
score: finalDocScore,
|
|
2419
|
+
chunks: docChunks.sort((a, b) => b.similarity - a.similarity),
|
|
2420
|
+
debug: {
|
|
2421
|
+
area: candidateDocFullContext.area,
|
|
2422
|
+
tech: candidateDocFullContext.dominantTech.join(', '),
|
|
2423
|
+
isGenericStyle: candidateDocFullContext.isGeneralPurposeReadmeStyle || isGenericByName,
|
|
2424
|
+
semanticQualityScore: semanticQualityScore.toFixed(4),
|
|
2425
|
+
docLevelContextMatchScore: docLevelContextMatchScore.toFixed(4),
|
|
2426
|
+
docH1RelevanceToReviewedFile: docH1RelevanceToReviewedFile.toFixed(4),
|
|
2427
|
+
genericDocPenaltyFactor: genericDocPenaltyFactor.toFixed(4),
|
|
2428
|
+
finalScore: finalDocScore.toFixed(4),
|
|
2429
|
+
},
|
|
2430
|
+
});
|
|
2431
|
+
}
|
|
2432
|
+
scoredDocuments.sort((a, b) => b.score - a.score);
|
|
2433
|
+
|
|
2434
|
+
debug('[getContextForFile] Top Scored Documents:');
|
|
2435
|
+
scoredDocuments.slice(0, 7).forEach((d) => {
|
|
2436
|
+
debug(
|
|
2437
|
+
` Path: ${d.path}, Score: ${d.score.toFixed(4)}, Area: ${d.debug.area}, Tech: ${d.debug.tech}, Generic: ${d.debug.isGenericStyle}`
|
|
2438
|
+
);
|
|
2439
|
+
});
|
|
2440
|
+
|
|
2441
|
+
const finalGuidelineSnippets = [];
|
|
2442
|
+
const relevantDocs = scoredDocuments.filter((doc) => {
|
|
2443
|
+
if (doc.score < 0.3) {
|
|
2444
|
+
debug(`[getContextForFile] Excluding doc ${doc.path} - score too low: ${doc.score.toFixed(4)}`);
|
|
2445
|
+
return false;
|
|
2446
|
+
}
|
|
2447
|
+
if (
|
|
2448
|
+
reviewedSnippetContext.area !== 'Unknown' &&
|
|
2449
|
+
doc.debug.area !== 'Unknown' &&
|
|
2450
|
+
doc.debug.area !== 'General' &&
|
|
2451
|
+
reviewedSnippetContext.area !== doc.debug.area
|
|
2452
|
+
) {
|
|
2453
|
+
const hasTechMatch = reviewedSnippetContext.dominantTech.some((tech) => doc.debug.tech.toLowerCase().includes(tech.toLowerCase()));
|
|
2454
|
+
if (!hasTechMatch) {
|
|
2455
|
+
debug(
|
|
2456
|
+
`[getContextForFile] Excluding doc ${doc.path} - area mismatch without tech match: ${doc.debug.area} vs ${reviewedSnippetContext.area}`
|
|
2457
|
+
);
|
|
2458
|
+
return false;
|
|
2459
|
+
}
|
|
2460
|
+
}
|
|
2461
|
+
return true;
|
|
2462
|
+
});
|
|
2463
|
+
|
|
2464
|
+
for (const doc of relevantDocs.slice(0, 4)) {
|
|
2465
|
+
if (doc.chunks && doc.chunks.length > 0) {
|
|
2466
|
+
finalGuidelineSnippets.push(doc.chunks[0]);
|
|
2467
|
+
}
|
|
2468
|
+
}
|
|
2469
|
+
|
|
2470
|
+
const uniqueCandidates = [];
|
|
2471
|
+
const seenPaths = new Set();
|
|
2472
|
+
const normalizedReviewPath = path.resolve(projectPath, filePath);
|
|
2473
|
+
|
|
2474
|
+
for (const candidate of codeExampleCandidates || []) {
|
|
2475
|
+
const normalizedCandidatePath = path.resolve(projectPath, candidate.path);
|
|
2476
|
+
if (normalizedCandidatePath !== normalizedReviewPath && !candidate.isDocumentation && !seenPaths.has(candidate.path)) {
|
|
2477
|
+
uniqueCandidates.push(candidate);
|
|
2478
|
+
seenPaths.add(candidate.path);
|
|
2479
|
+
}
|
|
2480
|
+
}
|
|
2481
|
+
uniqueCandidates.sort((a, b) => b.similarity - a.similarity);
|
|
2482
|
+
const finalCodeExamples = uniqueCandidates.slice(0, MAX_FINAL_EXAMPLES);
|
|
2483
|
+
|
|
2484
|
+
return {
|
|
2485
|
+
language,
|
|
2486
|
+
isTestFile,
|
|
2487
|
+
finalCodeExamples,
|
|
2488
|
+
finalGuidelineSnippets,
|
|
2489
|
+
prCommentContext,
|
|
2490
|
+
prContextAvailable,
|
|
2491
|
+
relevantCustomDocChunks, // Add relevant custom document chunks
|
|
2492
|
+
};
|
|
2493
|
+
}
|
|
2494
|
+
|
|
2495
|
+
async function gatherUnifiedContextForPR(prFiles, options = {}) {
|
|
2496
|
+
const allProcessedContext = {
|
|
2497
|
+
codeExamples: new Map(),
|
|
2498
|
+
guidelines: new Map(),
|
|
2499
|
+
prComments: new Map(),
|
|
2500
|
+
customDocChunks: new Map(),
|
|
2501
|
+
};
|
|
2502
|
+
|
|
2503
|
+
// Process custom documents into chunks once at the start for the entire PR
|
|
2504
|
+
let globalCustomDocChunks = [];
|
|
2505
|
+
if (options.customDocs && options.customDocs.length > 0) {
|
|
2506
|
+
const projectPath = options.projectPath || process.cwd();
|
|
2507
|
+
console.log(chalk.blue('📄 Processing custom documents once for entire PR...'));
|
|
2508
|
+
|
|
2509
|
+
try {
|
|
2510
|
+
// Check if custom documents are already processed for this project
|
|
2511
|
+
let processedChunks = await embeddingsSystem.getExistingCustomDocumentChunks(projectPath);
|
|
2512
|
+
|
|
2513
|
+
if (!processedChunks || processedChunks.length === 0) {
|
|
2514
|
+
console.log(chalk.cyan('📄 Custom documents not yet processed for this project, processing now...'));
|
|
2515
|
+
processedChunks = await embeddingsSystem.processCustomDocumentsInMemory(options.customDocs, projectPath);
|
|
2516
|
+
} else {
|
|
2517
|
+
console.log(chalk.green(`📄 Reusing ${processedChunks.length} already processed custom document chunks`));
|
|
2518
|
+
}
|
|
2519
|
+
|
|
2520
|
+
globalCustomDocChunks = processedChunks;
|
|
2521
|
+
console.log(chalk.green(`📄 Custom documents processed: ${globalCustomDocChunks.length} chunks available for PR analysis`));
|
|
2522
|
+
} catch (error) {
|
|
2523
|
+
console.error(chalk.red(`Error processing custom documents for PR: ${error.message}`));
|
|
2524
|
+
}
|
|
2525
|
+
}
|
|
2526
|
+
|
|
2527
|
+
const contextPromises = prFiles.map(async (file) => {
|
|
2528
|
+
try {
|
|
2529
|
+
const filePath = file.filePath;
|
|
2530
|
+
const content = file.diffContent || file.content;
|
|
2531
|
+
// Pass the pre-processed chunks to avoid reprocessing, but still allow file-specific similarity search
|
|
2532
|
+
const optionsWithPreprocessedChunks = {
|
|
2533
|
+
...options,
|
|
2534
|
+
customDocs: [], // Remove original custom docs to avoid reprocessing
|
|
2535
|
+
preprocessedCustomDocChunks: globalCustomDocChunks, // Pass pre-processed chunks
|
|
2536
|
+
};
|
|
2537
|
+
const context = await getContextForFile(filePath, content, optionsWithPreprocessedChunks);
|
|
2538
|
+
return {
|
|
2539
|
+
...context,
|
|
2540
|
+
filePath,
|
|
2541
|
+
};
|
|
2542
|
+
} catch (error) {
|
|
2543
|
+
console.error(chalk.red(`Error gathering context for file ${file.filePath}: ${error.message}`));
|
|
2544
|
+
return null; // Return null on error for this file
|
|
2545
|
+
}
|
|
2546
|
+
});
|
|
2547
|
+
|
|
2548
|
+
const allContexts = (await Promise.all(contextPromises)).filter(Boolean); // Filter out nulls
|
|
2549
|
+
|
|
2550
|
+
// Aggregate and deduplicate results
|
|
2551
|
+
for (const context of allContexts) {
|
|
2552
|
+
(context.finalCodeExamples || []).forEach((example) => {
|
|
2553
|
+
const key = example.path;
|
|
2554
|
+
if (
|
|
2555
|
+
key &&
|
|
2556
|
+
(!allProcessedContext.codeExamples.has(key) || example.similarity > allProcessedContext.codeExamples.get(key).similarity)
|
|
2557
|
+
) {
|
|
2558
|
+
allProcessedContext.codeExamples.set(key, example);
|
|
2559
|
+
}
|
|
2560
|
+
});
|
|
2561
|
+
|
|
2562
|
+
(context.finalGuidelineSnippets || []).forEach((guideline) => {
|
|
2563
|
+
const key = `${guideline.path}-${guideline.heading_text || ''}`;
|
|
2564
|
+
if (!allProcessedContext.guidelines.has(key) || guideline.similarity > allProcessedContext.guidelines.get(key).similarity) {
|
|
2565
|
+
allProcessedContext.guidelines.set(key, guideline);
|
|
2566
|
+
}
|
|
2567
|
+
});
|
|
2568
|
+
|
|
2569
|
+
(context.prCommentContext || []).forEach((comment) => {
|
|
2570
|
+
const key = comment.id;
|
|
2571
|
+
if (
|
|
2572
|
+
key &&
|
|
2573
|
+
(!allProcessedContext.prComments.has(key) || comment.relevanceScore > allProcessedContext.prComments.get(key).relevanceScore)
|
|
2574
|
+
) {
|
|
2575
|
+
allProcessedContext.prComments.set(key, comment);
|
|
2576
|
+
}
|
|
2577
|
+
});
|
|
2578
|
+
|
|
2579
|
+
(context.relevantCustomDocChunks || []).forEach((chunk) => {
|
|
2580
|
+
const key = chunk.id;
|
|
2581
|
+
if (
|
|
2582
|
+
key &&
|
|
2583
|
+
(!allProcessedContext.customDocChunks.has(key) || chunk.similarity > allProcessedContext.customDocChunks.get(key).similarity)
|
|
2584
|
+
) {
|
|
2585
|
+
allProcessedContext.customDocChunks.set(key, chunk);
|
|
2586
|
+
}
|
|
2587
|
+
});
|
|
2588
|
+
}
|
|
2589
|
+
|
|
2590
|
+
// Convert Maps to sorted arrays
|
|
2591
|
+
const deduplicatedCodeExamples = Array.from(allProcessedContext.codeExamples.values())
|
|
2592
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
2593
|
+
.slice(0, options.maxExamples || 40);
|
|
2594
|
+
|
|
2595
|
+
const deduplicatedGuidelines = Array.from(allProcessedContext.guidelines.values())
|
|
2596
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
2597
|
+
.slice(0, 100);
|
|
2598
|
+
|
|
2599
|
+
const deduplicatedPRComments = Array.from(allProcessedContext.prComments.values())
|
|
2600
|
+
.sort((a, b) => b.relevanceScore - a.relevanceScore)
|
|
2601
|
+
.slice(0, 40); // Keep a larger pool of 40 candidates for the final prompt selection
|
|
2602
|
+
|
|
2603
|
+
const deduplicatedCustomDocChunks = Array.from(allProcessedContext.customDocChunks.values())
|
|
2604
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
2605
|
+
.slice(0, 10); // Keep top 10 custom document chunks
|
|
2606
|
+
|
|
2607
|
+
return {
|
|
2608
|
+
codeExamples: deduplicatedCodeExamples,
|
|
2609
|
+
guidelines: deduplicatedGuidelines,
|
|
2610
|
+
prComments: deduplicatedPRComments,
|
|
2611
|
+
customDocChunks: deduplicatedCustomDocChunks,
|
|
2612
|
+
};
|
|
2613
|
+
}
|
|
2614
|
+
|
|
2615
|
+
/**
|
|
2616
|
+
* Filter out low severity issues from analysis results
|
|
2617
|
+
* Low severity issues are typically formatting/style concerns better handled by linters
|
|
2618
|
+
*
|
|
2619
|
+
* @param {Object} analysisResults - Analysis results from LLM
|
|
2620
|
+
* @param {Object} options - Filtering options
|
|
2621
|
+
* @returns {Object} Filtered analysis results without low severity issues
|
|
2622
|
+
*/
|
|
2623
|
+
function filterLowSeverityIssues(analysisResults, options = {}) {
|
|
2624
|
+
const { verbose = false } = options;
|
|
2625
|
+
|
|
2626
|
+
if (!analysisResults) {
|
|
2627
|
+
return analysisResults;
|
|
2628
|
+
}
|
|
2629
|
+
|
|
2630
|
+
let filteredCount = 0;
|
|
2631
|
+
|
|
2632
|
+
// Filter single-file issues array
|
|
2633
|
+
if (analysisResults.issues && Array.isArray(analysisResults.issues)) {
|
|
2634
|
+
const originalCount = analysisResults.issues.length;
|
|
2635
|
+
analysisResults.issues = analysisResults.issues.filter((issue) => {
|
|
2636
|
+
const severity = (issue.severity || '').toLowerCase();
|
|
2637
|
+
if (severity === 'low') {
|
|
2638
|
+
if (verbose) {
|
|
2639
|
+
console.log(chalk.yellow(` Filtering low severity issue: "${(issue.description || '').substring(0, 50)}..."`));
|
|
2640
|
+
}
|
|
2641
|
+
return false;
|
|
2642
|
+
}
|
|
2643
|
+
return true;
|
|
2644
|
+
});
|
|
2645
|
+
filteredCount += originalCount - analysisResults.issues.length;
|
|
2646
|
+
}
|
|
2647
|
+
|
|
2648
|
+
// Filter cross-file issues (for holistic PR review)
|
|
2649
|
+
if (analysisResults.crossFileIssues && Array.isArray(analysisResults.crossFileIssues)) {
|
|
2650
|
+
const originalCount = analysisResults.crossFileIssues.length;
|
|
2651
|
+
analysisResults.crossFileIssues = analysisResults.crossFileIssues.filter((issue) => {
|
|
2652
|
+
const severity = (issue.severity || '').toLowerCase();
|
|
2653
|
+
if (severity === 'low') {
|
|
2654
|
+
if (verbose) {
|
|
2655
|
+
console.log(
|
|
2656
|
+
chalk.yellow(` Filtering low severity cross-file issue: "${(issue.message || issue.description || '').substring(0, 50)}..."`)
|
|
2657
|
+
);
|
|
2658
|
+
}
|
|
2659
|
+
return false;
|
|
2660
|
+
}
|
|
2661
|
+
return true;
|
|
2662
|
+
});
|
|
2663
|
+
filteredCount += originalCount - analysisResults.crossFileIssues.length;
|
|
2664
|
+
}
|
|
2665
|
+
|
|
2666
|
+
// Filter file-specific issues (for holistic PR review)
|
|
2667
|
+
if (analysisResults.fileSpecificIssues && typeof analysisResults.fileSpecificIssues === 'object') {
|
|
2668
|
+
for (const filePath of Object.keys(analysisResults.fileSpecificIssues)) {
|
|
2669
|
+
const issues = analysisResults.fileSpecificIssues[filePath];
|
|
2670
|
+
if (Array.isArray(issues)) {
|
|
2671
|
+
const originalCount = issues.length;
|
|
2672
|
+
analysisResults.fileSpecificIssues[filePath] = issues.filter((issue) => {
|
|
2673
|
+
const severity = (issue.severity || '').toLowerCase();
|
|
2674
|
+
if (severity === 'low') {
|
|
2675
|
+
if (verbose) {
|
|
2676
|
+
console.log(
|
|
2677
|
+
chalk.yellow(` Filtering low severity issue in ${filePath}: "${(issue.description || '').substring(0, 50)}..."`)
|
|
2678
|
+
);
|
|
2679
|
+
}
|
|
2680
|
+
return false;
|
|
2681
|
+
}
|
|
2682
|
+
return true;
|
|
2683
|
+
});
|
|
2684
|
+
filteredCount += originalCount - analysisResults.fileSpecificIssues[filePath].length;
|
|
2685
|
+
}
|
|
2686
|
+
}
|
|
2687
|
+
}
|
|
2688
|
+
|
|
2689
|
+
if (filteredCount > 0) {
|
|
2690
|
+
console.log(chalk.cyan(`🔇 Filtered ${filteredCount} low severity issue(s) (formatting/style concerns handled by linters)`));
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2693
|
+
return analysisResults;
|
|
2694
|
+
}
|
|
2695
|
+
|
|
2696
|
+
/**
|
|
2697
|
+
* Filter analysis results based on feedback data using semantic similarity
|
|
2698
|
+
*
|
|
2699
|
+
* @param {Object} analysisResults - Raw analysis results from LLM
|
|
2700
|
+
* @param {Object} feedbackData - Loaded feedback data
|
|
2701
|
+
* @param {Object} options - Filtering options
|
|
2702
|
+
* @returns {Promise<Object>} Filtered analysis results
|
|
2703
|
+
*/
|
|
2704
|
+
async function filterAnalysisResults(analysisResults, feedbackData, options = {}) {
|
|
2705
|
+
const { similarityThreshold = 0.7, verbose = false } = options;
|
|
2706
|
+
|
|
2707
|
+
if (!analysisResults || !analysisResults.issues || !Array.isArray(analysisResults.issues)) {
|
|
2708
|
+
return analysisResults;
|
|
2709
|
+
}
|
|
2710
|
+
|
|
2711
|
+
const originalCount = analysisResults.issues.length;
|
|
2712
|
+
|
|
2713
|
+
// Ensure semantic similarity is initialized for better matching
|
|
2714
|
+
await ensureSemanticSimilarityInitialized();
|
|
2715
|
+
|
|
2716
|
+
// Log whether semantic similarity is available
|
|
2717
|
+
if (verbose) {
|
|
2718
|
+
const usingSemanticSimilarity = isSemanticSimilarityAvailable();
|
|
2719
|
+
console.log(
|
|
2720
|
+
chalk.cyan(`🔍 Filtering issues using ${usingSemanticSimilarity ? 'semantic + word-based similarity' : 'word-based similarity only'}`)
|
|
2721
|
+
);
|
|
2722
|
+
}
|
|
2723
|
+
|
|
2724
|
+
// Filter issues based on feedback (now async due to semantic similarity)
|
|
2725
|
+
const filterResults = await Promise.all(
|
|
2726
|
+
analysisResults.issues.map(async (issue, index) => {
|
|
2727
|
+
const issueDescription = issue.description || issue.summary || '';
|
|
2728
|
+
const shouldSkip = await shouldSkipSimilarIssue(issueDescription, feedbackData, {
|
|
2729
|
+
similarityThreshold,
|
|
2730
|
+
verbose,
|
|
2731
|
+
});
|
|
2732
|
+
|
|
2733
|
+
if (shouldSkip && verbose) {
|
|
2734
|
+
console.log(chalk.yellow(` Filtered issue ${index + 1}: "${issueDescription.substring(0, 50)}..."`));
|
|
2735
|
+
}
|
|
2736
|
+
|
|
2737
|
+
return { issue, shouldSkip };
|
|
2738
|
+
})
|
|
2739
|
+
);
|
|
2740
|
+
|
|
2741
|
+
const filteredIssues = filterResults.filter((result) => !result.shouldSkip).map((result) => result.issue);
|
|
2742
|
+
|
|
2743
|
+
const filteredCount = originalCount - filteredIssues.length;
|
|
2744
|
+
|
|
2745
|
+
if (verbose && filteredCount > 0) {
|
|
2746
|
+
console.log(chalk.green(`✅ Filtered ${filteredCount} dismissed issues, ${filteredIssues.length} remaining`));
|
|
2747
|
+
}
|
|
2748
|
+
|
|
2749
|
+
return {
|
|
2750
|
+
...analysisResults,
|
|
2751
|
+
issues: filteredIssues,
|
|
2752
|
+
metadata: {
|
|
2753
|
+
...analysisResults.metadata,
|
|
2754
|
+
feedbackFiltering: {
|
|
2755
|
+
originalIssueCount: originalCount,
|
|
2756
|
+
filteredIssueCount: filteredCount,
|
|
2757
|
+
finalIssueCount: filteredIssues.length,
|
|
2758
|
+
usedSemanticSimilarity: isSemanticSimilarityAvailable(),
|
|
2759
|
+
},
|
|
2760
|
+
},
|
|
2761
|
+
};
|
|
2762
|
+
}
|
|
2763
|
+
|
|
2764
|
+
export { runAnalysis, gatherUnifiedContextForPR };
|