codecritique 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1145 -0
- package/package.json +98 -0
- package/src/content-retrieval.js +747 -0
- package/src/custom-documents.js +597 -0
- package/src/embeddings/cache-manager.js +364 -0
- package/src/embeddings/constants.js +40 -0
- package/src/embeddings/database.js +921 -0
- package/src/embeddings/errors.js +208 -0
- package/src/embeddings/factory.js +447 -0
- package/src/embeddings/file-processor.js +851 -0
- package/src/embeddings/model-manager.js +337 -0
- package/src/embeddings/similarity-calculator.js +97 -0
- package/src/embeddings/types.js +113 -0
- package/src/feedback-loader.js +384 -0
- package/src/index.js +1418 -0
- package/src/llm.js +123 -0
- package/src/pr-history/analyzer.js +579 -0
- package/src/pr-history/bot-detector.js +123 -0
- package/src/pr-history/cli-utils.js +204 -0
- package/src/pr-history/comment-processor.js +549 -0
- package/src/pr-history/database.js +819 -0
- package/src/pr-history/github-client.js +629 -0
- package/src/project-analyzer.js +955 -0
- package/src/rag-analyzer.js +2764 -0
- package/src/rag-review.js +566 -0
- package/src/technology-keywords.json +753 -0
- package/src/utils/command.js +48 -0
- package/src/utils/constants.js +263 -0
- package/src/utils/context-inference.js +364 -0
- package/src/utils/document-detection.js +105 -0
- package/src/utils/file-validation.js +271 -0
- package/src/utils/git.js +232 -0
- package/src/utils/language-detection.js +170 -0
- package/src/utils/logging.js +24 -0
- package/src/utils/markdown.js +132 -0
- package/src/utils/mobilebert-tokenizer.js +141 -0
- package/src/utils/pr-chunking.js +276 -0
- package/src/utils/string-utils.js +28 -0
- package/src/zero-shot-classifier-open.js +392 -0
package/src/llm.js
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Integration Module
|
|
3
|
+
*
|
|
4
|
+
* This module provides functionality to interact with Large Language Models (LLMs)
|
|
5
|
+
* for code analysis and review. Enhanced to leverage project-specific patterns and
|
|
6
|
+
* feedback from PR reviews for more context-aware recommendations.
|
|
7
|
+
* Currently supports Anthropic's Claude Sonnet 4.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { Anthropic } from '@anthropic-ai/sdk';
|
|
11
|
+
import chalk from 'chalk';
|
|
12
|
+
import dotenv from 'dotenv';
|
|
13
|
+
|
|
14
|
+
// Load env variables if present; do not enforce key at import time
|
|
15
|
+
dotenv.config();
|
|
16
|
+
|
|
17
|
+
let anthropic = null;
|
|
18
|
+
/**
|
|
19
|
+
* Get the Anthropic client
|
|
20
|
+
* @returns {Anthropic} The Anthropic client
|
|
21
|
+
*/
|
|
22
|
+
function getAnthropicClient() {
|
|
23
|
+
if (anthropic) return anthropic;
|
|
24
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
25
|
+
if (!apiKey) {
|
|
26
|
+
throw new Error('ANTHROPIC_API_KEY is required for analysis. Set it in env or .env before running analyze.');
|
|
27
|
+
}
|
|
28
|
+
anthropic = new Anthropic({ apiKey });
|
|
29
|
+
return anthropic;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Default model
|
|
33
|
+
const DEFAULT_MODEL = 'claude-sonnet-4-5';
|
|
34
|
+
|
|
35
|
+
// Maximum tokens for response
|
|
36
|
+
const MAX_TOKENS = 4096;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Send a prompt to Claude and get a structured JSON response using tool calling
|
|
40
|
+
*
|
|
41
|
+
* @param {string} prompt - The prompt to send to Claude
|
|
42
|
+
* @param {Object} options - Options for the request
|
|
43
|
+
* @param {Object} options.jsonSchema - JSON schema for structured output
|
|
44
|
+
* @returns {Promise<Object>} The response from Claude with structured data
|
|
45
|
+
*/
|
|
46
|
+
async function sendPromptToClaude(prompt, options = {}) {
|
|
47
|
+
const { model = DEFAULT_MODEL, maxTokens = MAX_TOKENS, temperature = 0.7, system = '', jsonSchema = null } = options;
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
console.log(chalk.cyan('Sending prompt to Claude...'));
|
|
51
|
+
|
|
52
|
+
const client = getAnthropicClient();
|
|
53
|
+
|
|
54
|
+
// Use structured output with tool calling if schema is provided
|
|
55
|
+
if (jsonSchema) {
|
|
56
|
+
const tools = [
|
|
57
|
+
{
|
|
58
|
+
name: 'return_json',
|
|
59
|
+
description: 'Return the final answer strictly as JSON matching the schema.',
|
|
60
|
+
input_schema: jsonSchema,
|
|
61
|
+
},
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
const response = await client.messages.create({
|
|
65
|
+
model,
|
|
66
|
+
max_tokens: maxTokens,
|
|
67
|
+
temperature,
|
|
68
|
+
tools,
|
|
69
|
+
tool_choice: { type: 'tool', name: 'return_json' },
|
|
70
|
+
system:
|
|
71
|
+
system ||
|
|
72
|
+
'You are an expert code reviewer with deep knowledge of software engineering principles, design patterns, and best practices.',
|
|
73
|
+
messages: [
|
|
74
|
+
{
|
|
75
|
+
role: 'user',
|
|
76
|
+
content: prompt,
|
|
77
|
+
},
|
|
78
|
+
],
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Find the tool_use block and extract the structured data
|
|
82
|
+
const toolUse = response.content.find((block) => block.type === 'tool_use' && block.name === 'return_json');
|
|
83
|
+
|
|
84
|
+
if (!toolUse) {
|
|
85
|
+
throw new Error('No structured output received from Claude');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
content: JSON.stringify(toolUse.input, null, 2), // For backward compatibility
|
|
90
|
+
model: response.model,
|
|
91
|
+
usage: response.usage,
|
|
92
|
+
json: toolUse.input, // The parsed JavaScript object
|
|
93
|
+
};
|
|
94
|
+
} else {
|
|
95
|
+
// Fallback to regular text response
|
|
96
|
+
const response = await client.messages.create({
|
|
97
|
+
model,
|
|
98
|
+
max_tokens: maxTokens,
|
|
99
|
+
temperature,
|
|
100
|
+
system:
|
|
101
|
+
system ||
|
|
102
|
+
'You are an expert code reviewer with deep knowledge of software engineering principles, design patterns, and best practices.',
|
|
103
|
+
messages: [
|
|
104
|
+
{
|
|
105
|
+
role: 'user',
|
|
106
|
+
content: prompt,
|
|
107
|
+
},
|
|
108
|
+
],
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
content: response.content[0].text,
|
|
113
|
+
model: response.model,
|
|
114
|
+
usage: response.usage,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
} catch (error) {
|
|
118
|
+
console.error(chalk.red(`Error sending prompt to Claude: ${error.message}`));
|
|
119
|
+
throw error;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export { sendPromptToClaude };
|
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Main PR History Analyzer
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates the complete PR comment history analysis workflow using
|
|
5
|
+
* GitHub API client, comment processor, and database storage.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import chalk from 'chalk';
|
|
9
|
+
import { PRCommentProcessor } from './comment-processor.js';
|
|
10
|
+
import { clearPRComments, getPRCommentsStats, getProcessedPRDateRange, shouldSkipPR, storePRCommentsBatch } from './database.js';
|
|
11
|
+
import { GitHubAPIClient } from './github-client.js';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Progress tracking for PR analysis
|
|
15
|
+
*/
|
|
16
|
+
class PRAnalysisProgress {
|
|
17
|
+
constructor(repository) {
|
|
18
|
+
this.repository = repository;
|
|
19
|
+
this.progress = {
|
|
20
|
+
repository,
|
|
21
|
+
total_prs: 0,
|
|
22
|
+
processed_prs: 0,
|
|
23
|
+
total_comments: 0,
|
|
24
|
+
processed_comments: 0,
|
|
25
|
+
failed_comments: 0,
|
|
26
|
+
last_processed_pr: null,
|
|
27
|
+
last_processed_page: 0,
|
|
28
|
+
start_time: new Date().toISOString(),
|
|
29
|
+
last_updated: new Date().toISOString(),
|
|
30
|
+
errors: [],
|
|
31
|
+
status: 'not_started', // 'not_started', 'in_progress', 'completed', 'failed'
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async save() {
|
|
36
|
+
this.progress.last_updated = new Date().toISOString();
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async load() {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
updatePRs(total, processed) {
|
|
44
|
+
this.progress.total_prs = total;
|
|
45
|
+
this.progress.processed_prs = processed;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
updateComments(total, processed, failed = 0) {
|
|
49
|
+
this.progress.total_comments = total;
|
|
50
|
+
this.progress.processed_comments = processed;
|
|
51
|
+
this.progress.failed_comments = failed;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
setLastProcessed(prNumber, page = 0) {
|
|
55
|
+
this.progress.last_processed_pr = prNumber;
|
|
56
|
+
this.progress.last_processed_page = page;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
addError(error, context = '') {
|
|
60
|
+
this.progress.errors.push({
|
|
61
|
+
error: error.message,
|
|
62
|
+
context,
|
|
63
|
+
timestamp: new Date().toISOString(),
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
setStatus(status) {
|
|
68
|
+
this.progress.status = status;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
getProgressSummary() {
|
|
72
|
+
return {
|
|
73
|
+
repository: this.progress.repository,
|
|
74
|
+
status: this.progress.status,
|
|
75
|
+
prs: `${this.progress.processed_prs}/${this.progress.total_prs}`,
|
|
76
|
+
comments: `${this.progress.processed_comments}/${this.progress.total_comments}`,
|
|
77
|
+
failed_comments: this.progress.failed_comments,
|
|
78
|
+
errors: this.progress.errors.length,
|
|
79
|
+
elapsed: this.getElapsedTime(),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
getElapsedTime() {
|
|
84
|
+
const start = new Date(this.progress.start_time);
|
|
85
|
+
const now = new Date();
|
|
86
|
+
const elapsed = now - start;
|
|
87
|
+
const hours = Math.floor(elapsed / (1000 * 60 * 60));
|
|
88
|
+
const minutes = Math.floor((elapsed % (1000 * 60 * 60)) / (1000 * 60));
|
|
89
|
+
const seconds = Math.floor((elapsed % (1000 * 60)) / 1000);
|
|
90
|
+
return `${hours}h ${minutes}m ${seconds}s`;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Main PR History Analyzer class
|
|
96
|
+
*/
|
|
97
|
+
export class PRHistoryAnalyzer {
|
|
98
|
+
constructor(options = {}) {
|
|
99
|
+
this.githubClient = null;
|
|
100
|
+
this.commentProcessor = new PRCommentProcessor();
|
|
101
|
+
this.progress = null;
|
|
102
|
+
this.options = {
|
|
103
|
+
concurrency: 2,
|
|
104
|
+
batchSize: 50,
|
|
105
|
+
skipDependabot: true,
|
|
106
|
+
includeDrafts: false,
|
|
107
|
+
...options,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Initialize the analyzer with GitHub client
|
|
113
|
+
* @param {string} token - GitHub API token
|
|
114
|
+
*/
|
|
115
|
+
initialize(token) {
|
|
116
|
+
this.githubClient = new GitHubAPIClient({
|
|
117
|
+
token,
|
|
118
|
+
requestTimeout: 30000,
|
|
119
|
+
retries: 3,
|
|
120
|
+
concurrency: this.options.concurrency,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Analyze PR comment history for a repository
|
|
126
|
+
* @param {string} repository - Repository in format "owner/repo"
|
|
127
|
+
* @param {Object} options - Analysis options
|
|
128
|
+
* @returns {Promise<Object>} Analysis results
|
|
129
|
+
*/
|
|
130
|
+
async analyzeRepository(repository, options = {}) {
|
|
131
|
+
const {
|
|
132
|
+
since = null,
|
|
133
|
+
until = null,
|
|
134
|
+
limit = null,
|
|
135
|
+
resume = false,
|
|
136
|
+
clearExisting = false,
|
|
137
|
+
onProgress = null,
|
|
138
|
+
projectPath = process.cwd(),
|
|
139
|
+
} = options;
|
|
140
|
+
|
|
141
|
+
// Initialize progress tracking
|
|
142
|
+
this.progress = new PRAnalysisProgress(repository);
|
|
143
|
+
|
|
144
|
+
// Load existing progress if resuming
|
|
145
|
+
if (resume) {
|
|
146
|
+
const loaded = await this.progress.load();
|
|
147
|
+
if (loaded && this.progress.progress.status === 'completed') {
|
|
148
|
+
console.log(chalk.green(`Analysis for ${repository} already completed.`));
|
|
149
|
+
return await this.getAnalysisResults(repository, projectPath);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Clear existing data if requested
|
|
154
|
+
if (clearExisting) {
|
|
155
|
+
console.log(chalk.yellow(`Clearing existing PR comments for ${repository}...`));
|
|
156
|
+
await clearPRComments(repository, projectPath);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
try {
|
|
160
|
+
this.progress.setStatus('in_progress');
|
|
161
|
+
await this.progress.save();
|
|
162
|
+
|
|
163
|
+
console.log(chalk.blue(`Starting PR comment analysis for ${repository}`));
|
|
164
|
+
console.log(chalk.blue(`Options: concurrency=${this.options.concurrency}, batchSize=${this.options.batchSize}`));
|
|
165
|
+
|
|
166
|
+
// Step 1: Fetch all merged PRs
|
|
167
|
+
const prs = await this.fetchAllPRs(repository, { since, until, limit, resume, onProgress, projectPath });
|
|
168
|
+
|
|
169
|
+
if (prs.length === 0) {
|
|
170
|
+
console.log(chalk.yellow(`No merged PRs found for ${repository}`));
|
|
171
|
+
this.progress.setStatus('completed');
|
|
172
|
+
await this.progress.save();
|
|
173
|
+
return { repository, total_prs: 0, total_comments: 0, patterns: [] };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
console.log(chalk.green(`Found ${prs.length} merged PRs to analyze`));
|
|
177
|
+
this.progress.updatePRs(prs.length, 0);
|
|
178
|
+
|
|
179
|
+
// Step 2: Process PR comments
|
|
180
|
+
const processedComments = await this.processPRComments(prs, { onProgress, projectPath });
|
|
181
|
+
|
|
182
|
+
// Step 3: Store in database
|
|
183
|
+
if (processedComments.length > 0) {
|
|
184
|
+
console.log(chalk.blue(`Storing ${processedComments.length} processed comments in database...`));
|
|
185
|
+
const storedCount = await storePRCommentsBatch(processedComments, projectPath);
|
|
186
|
+
console.log(chalk.green(`Successfully stored ${storedCount} PR comments`));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Step 4: Generate final results
|
|
190
|
+
const results = await this.getAnalysisResults(repository, projectPath);
|
|
191
|
+
|
|
192
|
+
this.progress.setStatus('completed');
|
|
193
|
+
await this.progress.save();
|
|
194
|
+
|
|
195
|
+
console.log(chalk.green(`Analysis completed for ${repository}`));
|
|
196
|
+
console.log(chalk.green(`Processed ${results.total_prs} PRs with ${results.total_comments} comments`));
|
|
197
|
+
|
|
198
|
+
return results;
|
|
199
|
+
} catch (error) {
|
|
200
|
+
console.error(chalk.red(`Error analyzing repository ${repository}: ${error.message}`));
|
|
201
|
+
this.progress.addError(error, 'Repository analysis');
|
|
202
|
+
this.progress.setStatus('failed');
|
|
203
|
+
await this.progress.save();
|
|
204
|
+
throw error;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Fetch all merged PRs from repository
|
|
210
|
+
* @private
|
|
211
|
+
* @param {string} repository - Repository in format "owner/repo"
|
|
212
|
+
* @param {Object} options - Fetch options
|
|
213
|
+
* @returns {Promise<Array>} Array of PRs
|
|
214
|
+
*/
|
|
215
|
+
async fetchAllPRs(repository, options = {}) {
|
|
216
|
+
const { since, until, limit, resume, onProgress, projectPath = process.cwd() } = options;
|
|
217
|
+
const [owner, repo] = repository.split('/');
|
|
218
|
+
|
|
219
|
+
console.log(chalk.blue(`Fetching merged PRs for ${repository}...`));
|
|
220
|
+
|
|
221
|
+
try {
|
|
222
|
+
const startPage = resume ? this.progress.progress.last_processed_page + 1 : 1;
|
|
223
|
+
|
|
224
|
+
// Enable incremental updates by default unless explicit since/until dates are provided
|
|
225
|
+
const shouldUseIncremental = !since && !until && !resume;
|
|
226
|
+
|
|
227
|
+
const prs = await this.githubClient.fetchAllPRs(owner, repo, {
|
|
228
|
+
since,
|
|
229
|
+
until,
|
|
230
|
+
limit,
|
|
231
|
+
startPage,
|
|
232
|
+
skipDependabot: this.options.skipDependabot,
|
|
233
|
+
includeDrafts: this.options.includeDrafts,
|
|
234
|
+
incremental: shouldUseIncremental,
|
|
235
|
+
projectPath,
|
|
236
|
+
onProgress: (pageProgress) => {
|
|
237
|
+
this.progress.setLastProcessed(null, pageProgress.page);
|
|
238
|
+
if (onProgress) {
|
|
239
|
+
onProgress({
|
|
240
|
+
stage: 'fetching_prs',
|
|
241
|
+
current: pageProgress.page,
|
|
242
|
+
total: pageProgress.estimatedPages || pageProgress.page,
|
|
243
|
+
message: `Fetching PR page ${pageProgress.page}`,
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
},
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
return prs.filter((pr) => pr.merged_at); // Ensure only merged PRs
|
|
250
|
+
} catch (error) {
|
|
251
|
+
console.error(chalk.red(`Error fetching PRs: ${error.message}`));
|
|
252
|
+
this.progress.addError(error, 'Fetching PRs');
|
|
253
|
+
throw error;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Process comments for all PRs
|
|
259
|
+
* @private
|
|
260
|
+
* @param {Array} prs - Array of PR objects
|
|
261
|
+
* @param {Object} options - Processing options
|
|
262
|
+
* @returns {Promise<Array>} Array of processed comments
|
|
263
|
+
*/
|
|
264
|
+
async processPRComments(prs, options = {}) {
|
|
265
|
+
const { onProgress, projectPath = process.cwd() } = options;
|
|
266
|
+
const allProcessedComments = [];
|
|
267
|
+
let totalComments = 0;
|
|
268
|
+
let processedComments = 0;
|
|
269
|
+
let failedComments = 0;
|
|
270
|
+
|
|
271
|
+
console.log(chalk.blue(`Processing comments for ${prs.length} PRs...`));
|
|
272
|
+
console.log(chalk.cyan(`This may take several minutes for large repositories...`));
|
|
273
|
+
|
|
274
|
+
// Get processed PR date range to skip already processed PRs
|
|
275
|
+
console.log(chalk.blue(`Checking for already processed PRs...`));
|
|
276
|
+
const { oldestPR, newestPR } = await getProcessedPRDateRange(this.progress.repository, projectPath);
|
|
277
|
+
|
|
278
|
+
let skippedPRs = 0;
|
|
279
|
+
let prsToProcess = prs;
|
|
280
|
+
|
|
281
|
+
if (oldestPR && newestPR) {
|
|
282
|
+
console.log(chalk.blue(`Found processed PR range: ${oldestPR} to ${newestPR}`));
|
|
283
|
+
prsToProcess = prs.filter((pr) => {
|
|
284
|
+
const shouldSkip = shouldSkipPR(pr, oldestPR, newestPR);
|
|
285
|
+
if (shouldSkip) {
|
|
286
|
+
skippedPRs++;
|
|
287
|
+
}
|
|
288
|
+
return !shouldSkip;
|
|
289
|
+
});
|
|
290
|
+
console.log(chalk.green(`Skipping ${skippedPRs} already processed PRs, processing ${prsToProcess.length} new PRs`));
|
|
291
|
+
} else {
|
|
292
|
+
console.log(chalk.blue(`No previously processed PRs found, processing all ${prs.length} PRs`));
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (prsToProcess.length === 0) {
|
|
296
|
+
console.log(chalk.yellow(`All PRs have already been processed!`));
|
|
297
|
+
return allProcessedComments;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// First pass: count total comments for better progress tracking
|
|
301
|
+
console.log(chalk.blue(`Counting total comments across ${prsToProcess.length} PRs to process...`));
|
|
302
|
+
let estimatedComments = 0;
|
|
303
|
+
for (let i = 0; i < Math.min(prsToProcess.length, 10); i++) {
|
|
304
|
+
estimatedComments += (prsToProcess[i].comments || 0) + (prsToProcess[i].review_comments || 0);
|
|
305
|
+
}
|
|
306
|
+
const avgCommentsPerPR = estimatedComments / Math.min(prsToProcess.length, 10);
|
|
307
|
+
const totalEstimatedComments = Math.floor(avgCommentsPerPR * prsToProcess.length);
|
|
308
|
+
console.log(chalk.blue(`Estimated ${totalEstimatedComments} total comments to process`));
|
|
309
|
+
|
|
310
|
+
// Process PRs in batches
|
|
311
|
+
for (let i = 0; i < prsToProcess.length; i += this.options.batchSize) {
|
|
312
|
+
const batch = prsToProcess.slice(i, i + this.options.batchSize);
|
|
313
|
+
const batchNumber = Math.floor(i / this.options.batchSize) + 1;
|
|
314
|
+
const totalBatches = Math.ceil(prsToProcess.length / this.options.batchSize);
|
|
315
|
+
|
|
316
|
+
console.log(
|
|
317
|
+
chalk.blue(
|
|
318
|
+
`Processing PR batch ${batchNumber}/${totalBatches} (PRs ${i + 1}-${Math.min(i + this.options.batchSize, prsToProcess.length)})`
|
|
319
|
+
)
|
|
320
|
+
);
|
|
321
|
+
|
|
322
|
+
const batchStartTime = Date.now();
|
|
323
|
+
|
|
324
|
+
// Process PRs in parallel within batch
|
|
325
|
+
const batchPromises = batch.map(async (pr, batchIndex) => {
|
|
326
|
+
try {
|
|
327
|
+
const prIndex = i + batchIndex;
|
|
328
|
+
const prComments = await this.processSinglePR(pr);
|
|
329
|
+
|
|
330
|
+
this.progress.setLastProcessed(pr.number);
|
|
331
|
+
this.progress.updatePRs(prsToProcess.length, prIndex + 1);
|
|
332
|
+
|
|
333
|
+
if (onProgress) {
|
|
334
|
+
onProgress({
|
|
335
|
+
stage: 'processing_comments',
|
|
336
|
+
current: prIndex + 1,
|
|
337
|
+
total: prsToProcess.length,
|
|
338
|
+
message: `Processed PR #${pr.number} (${prComments.length} comments)`,
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
return prComments;
|
|
343
|
+
} catch (error) {
|
|
344
|
+
console.error(chalk.red(`Error processing PR #${pr.number}: ${error.message}`));
|
|
345
|
+
this.progress.addError(error, `PR #${pr.number}`);
|
|
346
|
+
return [];
|
|
347
|
+
}
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
// Wait for batch to complete
|
|
351
|
+
const batchResults = await Promise.all(batchPromises);
|
|
352
|
+
|
|
353
|
+
// Flatten and collect results
|
|
354
|
+
let batchCommentCount = 0;
|
|
355
|
+
for (const prComments of batchResults) {
|
|
356
|
+
totalComments += prComments.length;
|
|
357
|
+
const validComments = prComments.filter((comment) => comment !== null);
|
|
358
|
+
processedComments += validComments.length;
|
|
359
|
+
failedComments += prComments.length - validComments.length;
|
|
360
|
+
allProcessedComments.push(...validComments);
|
|
361
|
+
batchCommentCount += prComments.length;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const batchDuration = (Date.now() - batchStartTime) / 1000;
|
|
365
|
+
console.log(
|
|
366
|
+
chalk.blue(`Batch ${batchNumber}/${totalBatches} completed: ${batchCommentCount} comments in ${batchDuration.toFixed(1)}s`)
|
|
367
|
+
);
|
|
368
|
+
// Calculate progress percentage, handling case where totalEstimatedComments is 0
|
|
369
|
+
const progressPercentage = totalEstimatedComments > 0 ? ((processedComments / totalEstimatedComments) * 100).toFixed(1) : 'unknown';
|
|
370
|
+
|
|
371
|
+
const progressText =
|
|
372
|
+
totalEstimatedComments > 0
|
|
373
|
+
? `Progress: ${processedComments}/${totalEstimatedComments} comments processed (${progressPercentage}%)`
|
|
374
|
+
: `Progress: ${processedComments} comments processed`;
|
|
375
|
+
|
|
376
|
+
console.log(chalk.blue(progressText));
|
|
377
|
+
|
|
378
|
+
this.progress.updateComments(totalComments, processedComments, failedComments);
|
|
379
|
+
await this.progress.save();
|
|
380
|
+
|
|
381
|
+
// Small delay between batches to be gentle on APIs
|
|
382
|
+
if (i + this.options.batchSize < prsToProcess.length) {
|
|
383
|
+
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
console.log(chalk.green(`Processed ${processedComments}/${totalComments} comments from ${prsToProcess.length} PRs`));
|
|
388
|
+
if (skippedPRs > 0) {
|
|
389
|
+
console.log(chalk.blue(`Skipped ${skippedPRs} already processed PRs`));
|
|
390
|
+
}
|
|
391
|
+
if (failedComments > 0) {
|
|
392
|
+
console.log(chalk.yellow(`Failed to process ${failedComments} comments`));
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
return allProcessedComments;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Process comments for a single PR
|
|
400
|
+
* @private
|
|
401
|
+
* @param {Object} pr - PR object
|
|
402
|
+
* @returns {Promise<Array>} Array of processed comments
|
|
403
|
+
*/
|
|
404
|
+
async processSinglePR(pr) {
|
|
405
|
+
try {
|
|
406
|
+
const [owner, repo] = this.progress.repository.split('/');
|
|
407
|
+
|
|
408
|
+
// Fetch all types of comments for this PR
|
|
409
|
+
const [reviewComments, issueComments, prFiles] = await Promise.all([
|
|
410
|
+
this.githubClient.getPRReviewComments(owner, repo, pr.number),
|
|
411
|
+
this.githubClient.getPRIssueComments(owner, repo, pr.number),
|
|
412
|
+
this.githubClient.getPRFiles(owner, repo, pr.number),
|
|
413
|
+
]);
|
|
414
|
+
|
|
415
|
+
// Combine all comments
|
|
416
|
+
const allComments = [
|
|
417
|
+
...reviewComments.map((comment) => ({ ...comment, type: 'review' })),
|
|
418
|
+
...issueComments.map((comment) => ({ ...comment, type: 'issue' })),
|
|
419
|
+
];
|
|
420
|
+
|
|
421
|
+
if (allComments.length === 0) {
|
|
422
|
+
return [];
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// Create PR context
|
|
426
|
+
const prContext = {
|
|
427
|
+
pr: {
|
|
428
|
+
number: pr.number,
|
|
429
|
+
repository: this.progress.repository,
|
|
430
|
+
},
|
|
431
|
+
files: prFiles,
|
|
432
|
+
};
|
|
433
|
+
|
|
434
|
+
// Process comments using comment processor
|
|
435
|
+
const processedComments = await this.commentProcessor.processBatch(allComments, prContext);
|
|
436
|
+
return processedComments;
|
|
437
|
+
} catch (error) {
|
|
438
|
+
console.error(chalk.red(`Error processing PR #${pr.number}: ${error.message}`));
|
|
439
|
+
throw error;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* Get analysis results from database
|
|
445
|
+
* @private
|
|
446
|
+
* @param {string} repository - Repository name
|
|
447
|
+
* @param {string} projectPath - Project path for filtering (optional, defaults to cwd)
|
|
448
|
+
* @returns {Promise<Object>} Analysis results
|
|
449
|
+
*/
|
|
450
|
+
async getAnalysisResults(repository, projectPath = process.cwd()) {
|
|
451
|
+
try {
|
|
452
|
+
const stats = await getPRCommentsStats(repository, projectPath);
|
|
453
|
+
|
|
454
|
+
// Ensure stats has the expected structure
|
|
455
|
+
const safeStats = {
|
|
456
|
+
total_comments: stats?.total_comments || 0,
|
|
457
|
+
comment_types: stats?.comment_types || {},
|
|
458
|
+
issue_categories: stats?.issue_categories || {},
|
|
459
|
+
severity_levels: stats?.severity_levels || {},
|
|
460
|
+
authors: stats?.authors || {},
|
|
461
|
+
repositories: stats?.repositories || {},
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
// Extract patterns from statistics
|
|
465
|
+
const patterns = [];
|
|
466
|
+
|
|
467
|
+
// Add comment type patterns
|
|
468
|
+
try {
|
|
469
|
+
for (const [type, count] of Object.entries(safeStats.comment_types)) {
|
|
470
|
+
patterns.push({
|
|
471
|
+
type: 'comment_type',
|
|
472
|
+
name: type,
|
|
473
|
+
count,
|
|
474
|
+
percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
} catch (error) {
|
|
478
|
+
console.warn(chalk.yellow(`Error processing comment type patterns: ${error.message}`));
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// Add issue category patterns
|
|
482
|
+
try {
|
|
483
|
+
for (const [category, count] of Object.entries(safeStats.issue_categories)) {
|
|
484
|
+
patterns.push({
|
|
485
|
+
type: 'issue_category',
|
|
486
|
+
name: category,
|
|
487
|
+
count,
|
|
488
|
+
percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
} catch (error) {
|
|
492
|
+
console.warn(chalk.yellow(`Error processing issue category patterns: ${error.message}`));
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Add severity patterns
|
|
496
|
+
try {
|
|
497
|
+
for (const [severity, count] of Object.entries(safeStats.severity_levels)) {
|
|
498
|
+
patterns.push({
|
|
499
|
+
type: 'severity',
|
|
500
|
+
name: severity,
|
|
501
|
+
count,
|
|
502
|
+
percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
|
|
503
|
+
});
|
|
504
|
+
}
|
|
505
|
+
} catch (error) {
|
|
506
|
+
console.warn(chalk.yellow(`Error processing severity patterns: ${error.message}`));
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Calculate total PRs safely
|
|
510
|
+
let totalPRs = 0;
|
|
511
|
+
try {
|
|
512
|
+
const repoValues = Object.values(safeStats.repositories);
|
|
513
|
+
totalPRs = repoValues.length > 0 ? Math.max(...repoValues) : 0;
|
|
514
|
+
} catch (error) {
|
|
515
|
+
console.warn(chalk.yellow(`Error calculating total PRs: ${error.message}`));
|
|
516
|
+
totalPRs = 0;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
// Calculate top authors safely
|
|
520
|
+
let topAuthors = [];
|
|
521
|
+
try {
|
|
522
|
+
topAuthors = Object.entries(safeStats.authors)
|
|
523
|
+
.sort(([, a], [, b]) => b - a)
|
|
524
|
+
.slice(0, 10)
|
|
525
|
+
.map(([author, count]) => ({ author, count }));
|
|
526
|
+
} catch (error) {
|
|
527
|
+
console.warn(chalk.yellow(`Error calculating top authors: ${error.message}`));
|
|
528
|
+
topAuthors = [];
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
return {
|
|
532
|
+
repository,
|
|
533
|
+
total_prs: totalPRs,
|
|
534
|
+
total_comments: safeStats.total_comments,
|
|
535
|
+
comment_types: safeStats.comment_types,
|
|
536
|
+
issue_categories: safeStats.issue_categories,
|
|
537
|
+
severity_levels: safeStats.severity_levels,
|
|
538
|
+
top_authors: topAuthors,
|
|
539
|
+
patterns,
|
|
540
|
+
analysis_date: new Date().toISOString(),
|
|
541
|
+
};
|
|
542
|
+
} catch (error) {
|
|
543
|
+
console.error(chalk.red(`Error getting analysis results: ${error.message}`));
|
|
544
|
+
return {
|
|
545
|
+
repository,
|
|
546
|
+
total_prs: 0,
|
|
547
|
+
total_comments: 0,
|
|
548
|
+
patterns: [],
|
|
549
|
+
error: error.message,
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
/**
|
|
555
|
+
* Resume interrupted analysis
|
|
556
|
+
* @param {string} repository - Repository name
|
|
557
|
+
* @param {Object} options - Resume options
|
|
558
|
+
* @returns {Promise<Object>} Analysis results
|
|
559
|
+
*/
|
|
560
|
+
async resumeAnalysis(repository, options = {}) {
|
|
561
|
+
return this.analyzeRepository(repository, { ...options, resume: true });
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* Get progress status for repository
|
|
566
|
+
* @param {string} repository - Repository name
|
|
567
|
+
* @returns {Promise<Object>} Progress status
|
|
568
|
+
*/
|
|
569
|
+
async getProgressStatus(repository) {
|
|
570
|
+
const progress = new PRAnalysisProgress(repository);
|
|
571
|
+
const loaded = await progress.load();
|
|
572
|
+
|
|
573
|
+
if (!loaded) {
|
|
574
|
+
return { repository, status: 'not_started' };
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
return progress.getProgressSummary();
|
|
578
|
+
}
|
|
579
|
+
}
|