codecritique 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -114
- package/package.json +10 -9
- package/src/content-retrieval.test.js +775 -0
- package/src/custom-documents.test.js +440 -0
- package/src/feedback-loader.test.js +529 -0
- package/src/llm.test.js +256 -0
- package/src/project-analyzer.test.js +747 -0
- package/src/rag-analyzer.js +12 -0
- package/src/rag-analyzer.test.js +1109 -0
- package/src/rag-review.test.js +317 -0
- package/src/setupTests.js +131 -0
- package/src/zero-shot-classifier-open.test.js +278 -0
- package/src/embeddings/cache-manager.js +0 -364
- package/src/embeddings/constants.js +0 -40
- package/src/embeddings/database.js +0 -921
- package/src/embeddings/errors.js +0 -208
- package/src/embeddings/factory.js +0 -447
- package/src/embeddings/file-processor.js +0 -851
- package/src/embeddings/model-manager.js +0 -337
- package/src/embeddings/similarity-calculator.js +0 -97
- package/src/embeddings/types.js +0 -113
- package/src/pr-history/analyzer.js +0 -579
- package/src/pr-history/bot-detector.js +0 -123
- package/src/pr-history/cli-utils.js +0 -204
- package/src/pr-history/comment-processor.js +0 -549
- package/src/pr-history/database.js +0 -819
- package/src/pr-history/github-client.js +0 -629
- package/src/technology-keywords.json +0 -753
- package/src/utils/command.js +0 -48
- package/src/utils/constants.js +0 -263
- package/src/utils/context-inference.js +0 -364
- package/src/utils/document-detection.js +0 -105
- package/src/utils/file-validation.js +0 -271
- package/src/utils/git.js +0 -232
- package/src/utils/language-detection.js +0 -170
- package/src/utils/logging.js +0 -24
- package/src/utils/markdown.js +0 -132
- package/src/utils/mobilebert-tokenizer.js +0 -141
- package/src/utils/pr-chunking.js +0 -276
- package/src/utils/string-utils.js +0 -28
|
@@ -1,579 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Main PR History Analyzer
|
|
3
|
-
*
|
|
4
|
-
* Orchestrates the complete PR comment history analysis workflow using
|
|
5
|
-
* GitHub API client, comment processor, and database storage.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import chalk from 'chalk';
|
|
9
|
-
import { PRCommentProcessor } from './comment-processor.js';
|
|
10
|
-
import { clearPRComments, getPRCommentsStats, getProcessedPRDateRange, shouldSkipPR, storePRCommentsBatch } from './database.js';
|
|
11
|
-
import { GitHubAPIClient } from './github-client.js';
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Progress tracking for PR analysis
|
|
15
|
-
*/
|
|
16
|
-
class PRAnalysisProgress {
|
|
17
|
-
constructor(repository) {
|
|
18
|
-
this.repository = repository;
|
|
19
|
-
this.progress = {
|
|
20
|
-
repository,
|
|
21
|
-
total_prs: 0,
|
|
22
|
-
processed_prs: 0,
|
|
23
|
-
total_comments: 0,
|
|
24
|
-
processed_comments: 0,
|
|
25
|
-
failed_comments: 0,
|
|
26
|
-
last_processed_pr: null,
|
|
27
|
-
last_processed_page: 0,
|
|
28
|
-
start_time: new Date().toISOString(),
|
|
29
|
-
last_updated: new Date().toISOString(),
|
|
30
|
-
errors: [],
|
|
31
|
-
status: 'not_started', // 'not_started', 'in_progress', 'completed', 'failed'
|
|
32
|
-
};
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
async save() {
|
|
36
|
-
this.progress.last_updated = new Date().toISOString();
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
async load() {
|
|
40
|
-
return false;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
updatePRs(total, processed) {
|
|
44
|
-
this.progress.total_prs = total;
|
|
45
|
-
this.progress.processed_prs = processed;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
updateComments(total, processed, failed = 0) {
|
|
49
|
-
this.progress.total_comments = total;
|
|
50
|
-
this.progress.processed_comments = processed;
|
|
51
|
-
this.progress.failed_comments = failed;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
setLastProcessed(prNumber, page = 0) {
|
|
55
|
-
this.progress.last_processed_pr = prNumber;
|
|
56
|
-
this.progress.last_processed_page = page;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
addError(error, context = '') {
|
|
60
|
-
this.progress.errors.push({
|
|
61
|
-
error: error.message,
|
|
62
|
-
context,
|
|
63
|
-
timestamp: new Date().toISOString(),
|
|
64
|
-
});
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
setStatus(status) {
|
|
68
|
-
this.progress.status = status;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
getProgressSummary() {
|
|
72
|
-
return {
|
|
73
|
-
repository: this.progress.repository,
|
|
74
|
-
status: this.progress.status,
|
|
75
|
-
prs: `${this.progress.processed_prs}/${this.progress.total_prs}`,
|
|
76
|
-
comments: `${this.progress.processed_comments}/${this.progress.total_comments}`,
|
|
77
|
-
failed_comments: this.progress.failed_comments,
|
|
78
|
-
errors: this.progress.errors.length,
|
|
79
|
-
elapsed: this.getElapsedTime(),
|
|
80
|
-
};
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
getElapsedTime() {
|
|
84
|
-
const start = new Date(this.progress.start_time);
|
|
85
|
-
const now = new Date();
|
|
86
|
-
const elapsed = now - start;
|
|
87
|
-
const hours = Math.floor(elapsed / (1000 * 60 * 60));
|
|
88
|
-
const minutes = Math.floor((elapsed % (1000 * 60 * 60)) / (1000 * 60));
|
|
89
|
-
const seconds = Math.floor((elapsed % (1000 * 60)) / 1000);
|
|
90
|
-
return `${hours}h ${minutes}m ${seconds}s`;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Main PR History Analyzer class
|
|
96
|
-
*/
|
|
97
|
-
export class PRHistoryAnalyzer {
|
|
98
|
-
constructor(options = {}) {
|
|
99
|
-
this.githubClient = null;
|
|
100
|
-
this.commentProcessor = new PRCommentProcessor();
|
|
101
|
-
this.progress = null;
|
|
102
|
-
this.options = {
|
|
103
|
-
concurrency: 2,
|
|
104
|
-
batchSize: 50,
|
|
105
|
-
skipDependabot: true,
|
|
106
|
-
includeDrafts: false,
|
|
107
|
-
...options,
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Initialize the analyzer with GitHub client
|
|
113
|
-
* @param {string} token - GitHub API token
|
|
114
|
-
*/
|
|
115
|
-
initialize(token) {
|
|
116
|
-
this.githubClient = new GitHubAPIClient({
|
|
117
|
-
token,
|
|
118
|
-
requestTimeout: 30000,
|
|
119
|
-
retries: 3,
|
|
120
|
-
concurrency: this.options.concurrency,
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
/**
|
|
125
|
-
* Analyze PR comment history for a repository
|
|
126
|
-
* @param {string} repository - Repository in format "owner/repo"
|
|
127
|
-
* @param {Object} options - Analysis options
|
|
128
|
-
* @returns {Promise<Object>} Analysis results
|
|
129
|
-
*/
|
|
130
|
-
async analyzeRepository(repository, options = {}) {
|
|
131
|
-
const {
|
|
132
|
-
since = null,
|
|
133
|
-
until = null,
|
|
134
|
-
limit = null,
|
|
135
|
-
resume = false,
|
|
136
|
-
clearExisting = false,
|
|
137
|
-
onProgress = null,
|
|
138
|
-
projectPath = process.cwd(),
|
|
139
|
-
} = options;
|
|
140
|
-
|
|
141
|
-
// Initialize progress tracking
|
|
142
|
-
this.progress = new PRAnalysisProgress(repository);
|
|
143
|
-
|
|
144
|
-
// Load existing progress if resuming
|
|
145
|
-
if (resume) {
|
|
146
|
-
const loaded = await this.progress.load();
|
|
147
|
-
if (loaded && this.progress.progress.status === 'completed') {
|
|
148
|
-
console.log(chalk.green(`Analysis for ${repository} already completed.`));
|
|
149
|
-
return await this.getAnalysisResults(repository, projectPath);
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Clear existing data if requested
|
|
154
|
-
if (clearExisting) {
|
|
155
|
-
console.log(chalk.yellow(`Clearing existing PR comments for ${repository}...`));
|
|
156
|
-
await clearPRComments(repository, projectPath);
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
try {
|
|
160
|
-
this.progress.setStatus('in_progress');
|
|
161
|
-
await this.progress.save();
|
|
162
|
-
|
|
163
|
-
console.log(chalk.blue(`Starting PR comment analysis for ${repository}`));
|
|
164
|
-
console.log(chalk.blue(`Options: concurrency=${this.options.concurrency}, batchSize=${this.options.batchSize}`));
|
|
165
|
-
|
|
166
|
-
// Step 1: Fetch all merged PRs
|
|
167
|
-
const prs = await this.fetchAllPRs(repository, { since, until, limit, resume, onProgress, projectPath });
|
|
168
|
-
|
|
169
|
-
if (prs.length === 0) {
|
|
170
|
-
console.log(chalk.yellow(`No merged PRs found for ${repository}`));
|
|
171
|
-
this.progress.setStatus('completed');
|
|
172
|
-
await this.progress.save();
|
|
173
|
-
return { repository, total_prs: 0, total_comments: 0, patterns: [] };
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
console.log(chalk.green(`Found ${prs.length} merged PRs to analyze`));
|
|
177
|
-
this.progress.updatePRs(prs.length, 0);
|
|
178
|
-
|
|
179
|
-
// Step 2: Process PR comments
|
|
180
|
-
const processedComments = await this.processPRComments(prs, { onProgress, projectPath });
|
|
181
|
-
|
|
182
|
-
// Step 3: Store in database
|
|
183
|
-
if (processedComments.length > 0) {
|
|
184
|
-
console.log(chalk.blue(`Storing ${processedComments.length} processed comments in database...`));
|
|
185
|
-
const storedCount = await storePRCommentsBatch(processedComments, projectPath);
|
|
186
|
-
console.log(chalk.green(`Successfully stored ${storedCount} PR comments`));
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// Step 4: Generate final results
|
|
190
|
-
const results = await this.getAnalysisResults(repository, projectPath);
|
|
191
|
-
|
|
192
|
-
this.progress.setStatus('completed');
|
|
193
|
-
await this.progress.save();
|
|
194
|
-
|
|
195
|
-
console.log(chalk.green(`Analysis completed for ${repository}`));
|
|
196
|
-
console.log(chalk.green(`Processed ${results.total_prs} PRs with ${results.total_comments} comments`));
|
|
197
|
-
|
|
198
|
-
return results;
|
|
199
|
-
} catch (error) {
|
|
200
|
-
console.error(chalk.red(`Error analyzing repository ${repository}: ${error.message}`));
|
|
201
|
-
this.progress.addError(error, 'Repository analysis');
|
|
202
|
-
this.progress.setStatus('failed');
|
|
203
|
-
await this.progress.save();
|
|
204
|
-
throw error;
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
/**
|
|
209
|
-
* Fetch all merged PRs from repository
|
|
210
|
-
* @private
|
|
211
|
-
* @param {string} repository - Repository in format "owner/repo"
|
|
212
|
-
* @param {Object} options - Fetch options
|
|
213
|
-
* @returns {Promise<Array>} Array of PRs
|
|
214
|
-
*/
|
|
215
|
-
async fetchAllPRs(repository, options = {}) {
|
|
216
|
-
const { since, until, limit, resume, onProgress, projectPath = process.cwd() } = options;
|
|
217
|
-
const [owner, repo] = repository.split('/');
|
|
218
|
-
|
|
219
|
-
console.log(chalk.blue(`Fetching merged PRs for ${repository}...`));
|
|
220
|
-
|
|
221
|
-
try {
|
|
222
|
-
const startPage = resume ? this.progress.progress.last_processed_page + 1 : 1;
|
|
223
|
-
|
|
224
|
-
// Enable incremental updates by default unless explicit since/until dates are provided
|
|
225
|
-
const shouldUseIncremental = !since && !until && !resume;
|
|
226
|
-
|
|
227
|
-
const prs = await this.githubClient.fetchAllPRs(owner, repo, {
|
|
228
|
-
since,
|
|
229
|
-
until,
|
|
230
|
-
limit,
|
|
231
|
-
startPage,
|
|
232
|
-
skipDependabot: this.options.skipDependabot,
|
|
233
|
-
includeDrafts: this.options.includeDrafts,
|
|
234
|
-
incremental: shouldUseIncremental,
|
|
235
|
-
projectPath,
|
|
236
|
-
onProgress: (pageProgress) => {
|
|
237
|
-
this.progress.setLastProcessed(null, pageProgress.page);
|
|
238
|
-
if (onProgress) {
|
|
239
|
-
onProgress({
|
|
240
|
-
stage: 'fetching_prs',
|
|
241
|
-
current: pageProgress.page,
|
|
242
|
-
total: pageProgress.estimatedPages || pageProgress.page,
|
|
243
|
-
message: `Fetching PR page ${pageProgress.page}`,
|
|
244
|
-
});
|
|
245
|
-
}
|
|
246
|
-
},
|
|
247
|
-
});
|
|
248
|
-
|
|
249
|
-
return prs.filter((pr) => pr.merged_at); // Ensure only merged PRs
|
|
250
|
-
} catch (error) {
|
|
251
|
-
console.error(chalk.red(`Error fetching PRs: ${error.message}`));
|
|
252
|
-
this.progress.addError(error, 'Fetching PRs');
|
|
253
|
-
throw error;
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
/**
|
|
258
|
-
* Process comments for all PRs
|
|
259
|
-
* @private
|
|
260
|
-
* @param {Array} prs - Array of PR objects
|
|
261
|
-
* @param {Object} options - Processing options
|
|
262
|
-
* @returns {Promise<Array>} Array of processed comments
|
|
263
|
-
*/
|
|
264
|
-
async processPRComments(prs, options = {}) {
|
|
265
|
-
const { onProgress, projectPath = process.cwd() } = options;
|
|
266
|
-
const allProcessedComments = [];
|
|
267
|
-
let totalComments = 0;
|
|
268
|
-
let processedComments = 0;
|
|
269
|
-
let failedComments = 0;
|
|
270
|
-
|
|
271
|
-
console.log(chalk.blue(`Processing comments for ${prs.length} PRs...`));
|
|
272
|
-
console.log(chalk.cyan(`This may take several minutes for large repositories...`));
|
|
273
|
-
|
|
274
|
-
// Get processed PR date range to skip already processed PRs
|
|
275
|
-
console.log(chalk.blue(`Checking for already processed PRs...`));
|
|
276
|
-
const { oldestPR, newestPR } = await getProcessedPRDateRange(this.progress.repository, projectPath);
|
|
277
|
-
|
|
278
|
-
let skippedPRs = 0;
|
|
279
|
-
let prsToProcess = prs;
|
|
280
|
-
|
|
281
|
-
if (oldestPR && newestPR) {
|
|
282
|
-
console.log(chalk.blue(`Found processed PR range: ${oldestPR} to ${newestPR}`));
|
|
283
|
-
prsToProcess = prs.filter((pr) => {
|
|
284
|
-
const shouldSkip = shouldSkipPR(pr, oldestPR, newestPR);
|
|
285
|
-
if (shouldSkip) {
|
|
286
|
-
skippedPRs++;
|
|
287
|
-
}
|
|
288
|
-
return !shouldSkip;
|
|
289
|
-
});
|
|
290
|
-
console.log(chalk.green(`Skipping ${skippedPRs} already processed PRs, processing ${prsToProcess.length} new PRs`));
|
|
291
|
-
} else {
|
|
292
|
-
console.log(chalk.blue(`No previously processed PRs found, processing all ${prs.length} PRs`));
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
if (prsToProcess.length === 0) {
|
|
296
|
-
console.log(chalk.yellow(`All PRs have already been processed!`));
|
|
297
|
-
return allProcessedComments;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
// First pass: count total comments for better progress tracking
|
|
301
|
-
console.log(chalk.blue(`Counting total comments across ${prsToProcess.length} PRs to process...`));
|
|
302
|
-
let estimatedComments = 0;
|
|
303
|
-
for (let i = 0; i < Math.min(prsToProcess.length, 10); i++) {
|
|
304
|
-
estimatedComments += (prsToProcess[i].comments || 0) + (prsToProcess[i].review_comments || 0);
|
|
305
|
-
}
|
|
306
|
-
const avgCommentsPerPR = estimatedComments / Math.min(prsToProcess.length, 10);
|
|
307
|
-
const totalEstimatedComments = Math.floor(avgCommentsPerPR * prsToProcess.length);
|
|
308
|
-
console.log(chalk.blue(`Estimated ${totalEstimatedComments} total comments to process`));
|
|
309
|
-
|
|
310
|
-
// Process PRs in batches
|
|
311
|
-
for (let i = 0; i < prsToProcess.length; i += this.options.batchSize) {
|
|
312
|
-
const batch = prsToProcess.slice(i, i + this.options.batchSize);
|
|
313
|
-
const batchNumber = Math.floor(i / this.options.batchSize) + 1;
|
|
314
|
-
const totalBatches = Math.ceil(prsToProcess.length / this.options.batchSize);
|
|
315
|
-
|
|
316
|
-
console.log(
|
|
317
|
-
chalk.blue(
|
|
318
|
-
`Processing PR batch ${batchNumber}/${totalBatches} (PRs ${i + 1}-${Math.min(i + this.options.batchSize, prsToProcess.length)})`
|
|
319
|
-
)
|
|
320
|
-
);
|
|
321
|
-
|
|
322
|
-
const batchStartTime = Date.now();
|
|
323
|
-
|
|
324
|
-
// Process PRs in parallel within batch
|
|
325
|
-
const batchPromises = batch.map(async (pr, batchIndex) => {
|
|
326
|
-
try {
|
|
327
|
-
const prIndex = i + batchIndex;
|
|
328
|
-
const prComments = await this.processSinglePR(pr);
|
|
329
|
-
|
|
330
|
-
this.progress.setLastProcessed(pr.number);
|
|
331
|
-
this.progress.updatePRs(prsToProcess.length, prIndex + 1);
|
|
332
|
-
|
|
333
|
-
if (onProgress) {
|
|
334
|
-
onProgress({
|
|
335
|
-
stage: 'processing_comments',
|
|
336
|
-
current: prIndex + 1,
|
|
337
|
-
total: prsToProcess.length,
|
|
338
|
-
message: `Processed PR #${pr.number} (${prComments.length} comments)`,
|
|
339
|
-
});
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
return prComments;
|
|
343
|
-
} catch (error) {
|
|
344
|
-
console.error(chalk.red(`Error processing PR #${pr.number}: ${error.message}`));
|
|
345
|
-
this.progress.addError(error, `PR #${pr.number}`);
|
|
346
|
-
return [];
|
|
347
|
-
}
|
|
348
|
-
});
|
|
349
|
-
|
|
350
|
-
// Wait for batch to complete
|
|
351
|
-
const batchResults = await Promise.all(batchPromises);
|
|
352
|
-
|
|
353
|
-
// Flatten and collect results
|
|
354
|
-
let batchCommentCount = 0;
|
|
355
|
-
for (const prComments of batchResults) {
|
|
356
|
-
totalComments += prComments.length;
|
|
357
|
-
const validComments = prComments.filter((comment) => comment !== null);
|
|
358
|
-
processedComments += validComments.length;
|
|
359
|
-
failedComments += prComments.length - validComments.length;
|
|
360
|
-
allProcessedComments.push(...validComments);
|
|
361
|
-
batchCommentCount += prComments.length;
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
const batchDuration = (Date.now() - batchStartTime) / 1000;
|
|
365
|
-
console.log(
|
|
366
|
-
chalk.blue(`Batch ${batchNumber}/${totalBatches} completed: ${batchCommentCount} comments in ${batchDuration.toFixed(1)}s`)
|
|
367
|
-
);
|
|
368
|
-
// Calculate progress percentage, handling case where totalEstimatedComments is 0
|
|
369
|
-
const progressPercentage = totalEstimatedComments > 0 ? ((processedComments / totalEstimatedComments) * 100).toFixed(1) : 'unknown';
|
|
370
|
-
|
|
371
|
-
const progressText =
|
|
372
|
-
totalEstimatedComments > 0
|
|
373
|
-
? `Progress: ${processedComments}/${totalEstimatedComments} comments processed (${progressPercentage}%)`
|
|
374
|
-
: `Progress: ${processedComments} comments processed`;
|
|
375
|
-
|
|
376
|
-
console.log(chalk.blue(progressText));
|
|
377
|
-
|
|
378
|
-
this.progress.updateComments(totalComments, processedComments, failedComments);
|
|
379
|
-
await this.progress.save();
|
|
380
|
-
|
|
381
|
-
// Small delay between batches to be gentle on APIs
|
|
382
|
-
if (i + this.options.batchSize < prsToProcess.length) {
|
|
383
|
-
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
console.log(chalk.green(`Processed ${processedComments}/${totalComments} comments from ${prsToProcess.length} PRs`));
|
|
388
|
-
if (skippedPRs > 0) {
|
|
389
|
-
console.log(chalk.blue(`Skipped ${skippedPRs} already processed PRs`));
|
|
390
|
-
}
|
|
391
|
-
if (failedComments > 0) {
|
|
392
|
-
console.log(chalk.yellow(`Failed to process ${failedComments} comments`));
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
return allProcessedComments;
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
/**
|
|
399
|
-
* Process comments for a single PR
|
|
400
|
-
* @private
|
|
401
|
-
* @param {Object} pr - PR object
|
|
402
|
-
* @returns {Promise<Array>} Array of processed comments
|
|
403
|
-
*/
|
|
404
|
-
async processSinglePR(pr) {
|
|
405
|
-
try {
|
|
406
|
-
const [owner, repo] = this.progress.repository.split('/');
|
|
407
|
-
|
|
408
|
-
// Fetch all types of comments for this PR
|
|
409
|
-
const [reviewComments, issueComments, prFiles] = await Promise.all([
|
|
410
|
-
this.githubClient.getPRReviewComments(owner, repo, pr.number),
|
|
411
|
-
this.githubClient.getPRIssueComments(owner, repo, pr.number),
|
|
412
|
-
this.githubClient.getPRFiles(owner, repo, pr.number),
|
|
413
|
-
]);
|
|
414
|
-
|
|
415
|
-
// Combine all comments
|
|
416
|
-
const allComments = [
|
|
417
|
-
...reviewComments.map((comment) => ({ ...comment, type: 'review' })),
|
|
418
|
-
...issueComments.map((comment) => ({ ...comment, type: 'issue' })),
|
|
419
|
-
];
|
|
420
|
-
|
|
421
|
-
if (allComments.length === 0) {
|
|
422
|
-
return [];
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
// Create PR context
|
|
426
|
-
const prContext = {
|
|
427
|
-
pr: {
|
|
428
|
-
number: pr.number,
|
|
429
|
-
repository: this.progress.repository,
|
|
430
|
-
},
|
|
431
|
-
files: prFiles,
|
|
432
|
-
};
|
|
433
|
-
|
|
434
|
-
// Process comments using comment processor
|
|
435
|
-
const processedComments = await this.commentProcessor.processBatch(allComments, prContext);
|
|
436
|
-
return processedComments;
|
|
437
|
-
} catch (error) {
|
|
438
|
-
console.error(chalk.red(`Error processing PR #${pr.number}: ${error.message}`));
|
|
439
|
-
throw error;
|
|
440
|
-
}
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
/**
|
|
444
|
-
* Get analysis results from database
|
|
445
|
-
* @private
|
|
446
|
-
* @param {string} repository - Repository name
|
|
447
|
-
* @param {string} projectPath - Project path for filtering (optional, defaults to cwd)
|
|
448
|
-
* @returns {Promise<Object>} Analysis results
|
|
449
|
-
*/
|
|
450
|
-
async getAnalysisResults(repository, projectPath = process.cwd()) {
|
|
451
|
-
try {
|
|
452
|
-
const stats = await getPRCommentsStats(repository, projectPath);
|
|
453
|
-
|
|
454
|
-
// Ensure stats has the expected structure
|
|
455
|
-
const safeStats = {
|
|
456
|
-
total_comments: stats?.total_comments || 0,
|
|
457
|
-
comment_types: stats?.comment_types || {},
|
|
458
|
-
issue_categories: stats?.issue_categories || {},
|
|
459
|
-
severity_levels: stats?.severity_levels || {},
|
|
460
|
-
authors: stats?.authors || {},
|
|
461
|
-
repositories: stats?.repositories || {},
|
|
462
|
-
};
|
|
463
|
-
|
|
464
|
-
// Extract patterns from statistics
|
|
465
|
-
const patterns = [];
|
|
466
|
-
|
|
467
|
-
// Add comment type patterns
|
|
468
|
-
try {
|
|
469
|
-
for (const [type, count] of Object.entries(safeStats.comment_types)) {
|
|
470
|
-
patterns.push({
|
|
471
|
-
type: 'comment_type',
|
|
472
|
-
name: type,
|
|
473
|
-
count,
|
|
474
|
-
percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
|
|
475
|
-
});
|
|
476
|
-
}
|
|
477
|
-
} catch (error) {
|
|
478
|
-
console.warn(chalk.yellow(`Error processing comment type patterns: ${error.message}`));
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
// Add issue category patterns
|
|
482
|
-
try {
|
|
483
|
-
for (const [category, count] of Object.entries(safeStats.issue_categories)) {
|
|
484
|
-
patterns.push({
|
|
485
|
-
type: 'issue_category',
|
|
486
|
-
name: category,
|
|
487
|
-
count,
|
|
488
|
-
percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
|
|
489
|
-
});
|
|
490
|
-
}
|
|
491
|
-
} catch (error) {
|
|
492
|
-
console.warn(chalk.yellow(`Error processing issue category patterns: ${error.message}`));
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
// Add severity patterns
|
|
496
|
-
try {
|
|
497
|
-
for (const [severity, count] of Object.entries(safeStats.severity_levels)) {
|
|
498
|
-
patterns.push({
|
|
499
|
-
type: 'severity',
|
|
500
|
-
name: severity,
|
|
501
|
-
count,
|
|
502
|
-
percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
|
|
503
|
-
});
|
|
504
|
-
}
|
|
505
|
-
} catch (error) {
|
|
506
|
-
console.warn(chalk.yellow(`Error processing severity patterns: ${error.message}`));
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
// Calculate total PRs safely
|
|
510
|
-
let totalPRs = 0;
|
|
511
|
-
try {
|
|
512
|
-
const repoValues = Object.values(safeStats.repositories);
|
|
513
|
-
totalPRs = repoValues.length > 0 ? Math.max(...repoValues) : 0;
|
|
514
|
-
} catch (error) {
|
|
515
|
-
console.warn(chalk.yellow(`Error calculating total PRs: ${error.message}`));
|
|
516
|
-
totalPRs = 0;
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
// Calculate top authors safely
|
|
520
|
-
let topAuthors = [];
|
|
521
|
-
try {
|
|
522
|
-
topAuthors = Object.entries(safeStats.authors)
|
|
523
|
-
.sort(([, a], [, b]) => b - a)
|
|
524
|
-
.slice(0, 10)
|
|
525
|
-
.map(([author, count]) => ({ author, count }));
|
|
526
|
-
} catch (error) {
|
|
527
|
-
console.warn(chalk.yellow(`Error calculating top authors: ${error.message}`));
|
|
528
|
-
topAuthors = [];
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
return {
|
|
532
|
-
repository,
|
|
533
|
-
total_prs: totalPRs,
|
|
534
|
-
total_comments: safeStats.total_comments,
|
|
535
|
-
comment_types: safeStats.comment_types,
|
|
536
|
-
issue_categories: safeStats.issue_categories,
|
|
537
|
-
severity_levels: safeStats.severity_levels,
|
|
538
|
-
top_authors: topAuthors,
|
|
539
|
-
patterns,
|
|
540
|
-
analysis_date: new Date().toISOString(),
|
|
541
|
-
};
|
|
542
|
-
} catch (error) {
|
|
543
|
-
console.error(chalk.red(`Error getting analysis results: ${error.message}`));
|
|
544
|
-
return {
|
|
545
|
-
repository,
|
|
546
|
-
total_prs: 0,
|
|
547
|
-
total_comments: 0,
|
|
548
|
-
patterns: [],
|
|
549
|
-
error: error.message,
|
|
550
|
-
};
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
/**
|
|
555
|
-
* Resume interrupted analysis
|
|
556
|
-
* @param {string} repository - Repository name
|
|
557
|
-
* @param {Object} options - Resume options
|
|
558
|
-
* @returns {Promise<Object>} Analysis results
|
|
559
|
-
*/
|
|
560
|
-
async resumeAnalysis(repository, options = {}) {
|
|
561
|
-
return this.analyzeRepository(repository, { ...options, resume: true });
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
/**
|
|
565
|
-
* Get progress status for repository
|
|
566
|
-
* @param {string} repository - Repository name
|
|
567
|
-
* @returns {Promise<Object>} Progress status
|
|
568
|
-
*/
|
|
569
|
-
async getProgressStatus(repository) {
|
|
570
|
-
const progress = new PRAnalysisProgress(repository);
|
|
571
|
-
const loaded = await progress.load();
|
|
572
|
-
|
|
573
|
-
if (!loaded) {
|
|
574
|
-
return { repository, status: 'not_started' };
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
return progress.getProgressSummary();
|
|
578
|
-
}
|
|
579
|
-
}
|
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Bot Detection Utility
|
|
3
|
-
*
|
|
4
|
-
* Detects and filters out bot comments from PR analysis.
|
|
5
|
-
* Bots provide automated feedback that isn't useful for human review pattern analysis.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Common bot patterns found in GitHub usernames and comment content
|
|
10
|
-
*/
|
|
11
|
-
const BOT_PATTERNS = {
|
|
12
|
-
// Username patterns
|
|
13
|
-
usernames: [
|
|
14
|
-
/\[bot\]$/i, // e.g., sonarqubecloud[bot], dependabot[bot]
|
|
15
|
-
/^bot-/i, // e.g., bot-reviewer
|
|
16
|
-
/-bot$/i, // e.g., review-bot
|
|
17
|
-
/^dependabot/i, // Dependabot variations
|
|
18
|
-
/^renovate/i, // Renovate bot variations
|
|
19
|
-
/^github-actions/i, // GitHub Actions bot
|
|
20
|
-
/^codecov/i, // Codecov bot
|
|
21
|
-
/^sonarcloud/i, // SonarCloud variations
|
|
22
|
-
/^sonarqube/i, // SonarQube variations
|
|
23
|
-
/^snyk/i, // Snyk security bot
|
|
24
|
-
/^greenkeeper/i, // Greenkeeper bot
|
|
25
|
-
/^semantic-release/i, // Semantic release bot
|
|
26
|
-
/^allcontributors/i, // All contributors bot
|
|
27
|
-
/^stale/i, // Stale bot
|
|
28
|
-
/^mergify/i, // Mergify bot
|
|
29
|
-
/^auto-merge/i, // Auto-merge bots
|
|
30
|
-
/^ci-bot/i, // CI bots
|
|
31
|
-
/^deploy-bot/i, // Deploy bots
|
|
32
|
-
],
|
|
33
|
-
};
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* Known bot usernames (exact matches)
|
|
37
|
-
*/
|
|
38
|
-
const KNOWN_BOTS = new Set([
|
|
39
|
-
'dependabot[bot]',
|
|
40
|
-
'renovate[bot]',
|
|
41
|
-
'github-actions[bot]',
|
|
42
|
-
'codecov[bot]',
|
|
43
|
-
'sonarqubecloud[bot]',
|
|
44
|
-
'sonarcloud[bot]',
|
|
45
|
-
'snyk[bot]',
|
|
46
|
-
'greenkeeper[bot]',
|
|
47
|
-
'semantic-release-bot',
|
|
48
|
-
'allcontributors[bot]',
|
|
49
|
-
'stale[bot]',
|
|
50
|
-
'mergify[bot]',
|
|
51
|
-
'auto-merge-bot',
|
|
52
|
-
'ci-bot',
|
|
53
|
-
'deploy-bot',
|
|
54
|
-
'vercel[bot]',
|
|
55
|
-
'netlify[bot]',
|
|
56
|
-
'heroku[bot]',
|
|
57
|
-
'circleci[bot]',
|
|
58
|
-
'travis[bot]',
|
|
59
|
-
'jenkins[bot]',
|
|
60
|
-
'azure-pipelines[bot]',
|
|
61
|
-
'gitpod[bot]',
|
|
62
|
-
'codesandbox[bot]',
|
|
63
|
-
'deepsource[bot]',
|
|
64
|
-
'codeclimate[bot]',
|
|
65
|
-
'codebeat[bot]',
|
|
66
|
-
'codacy[bot]',
|
|
67
|
-
'houndci-bot',
|
|
68
|
-
'danger[bot]',
|
|
69
|
-
'prettier[bot]',
|
|
70
|
-
'eslint[bot]',
|
|
71
|
-
'typescript[bot]',
|
|
72
|
-
]);
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Check if a username indicates a bot account
|
|
76
|
-
* @param {string} username - GitHub username to check
|
|
77
|
-
* @returns {boolean} True if username appears to be a bot
|
|
78
|
-
*/
|
|
79
|
-
function isBotUsername(username) {
|
|
80
|
-
if (!username || typeof username !== 'string') {
|
|
81
|
-
return false;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
const normalizedUsername = username.toLowerCase().trim();
|
|
85
|
-
|
|
86
|
-
// Check exact matches first (most reliable)
|
|
87
|
-
if (KNOWN_BOTS.has(username) || KNOWN_BOTS.has(normalizedUsername)) {
|
|
88
|
-
return true;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// Check username patterns
|
|
92
|
-
return BOT_PATTERNS.usernames.some((pattern) => pattern.test(username));
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
/**
|
|
96
|
-
* Bot detection for a comment based only on username
|
|
97
|
-
* @param {Object} comment - Comment object with user and body properties
|
|
98
|
-
* @returns {boolean} True if comment appears to be from a bot
|
|
99
|
-
*/
|
|
100
|
-
function isBotComment(comment) {
|
|
101
|
-
if (!comment) {
|
|
102
|
-
return false;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// Only check username (most reliable indicator)
|
|
106
|
-
const username = comment.user?.login || comment.author_login || comment.author;
|
|
107
|
-
return username ? isBotUsername(username) : false;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Filter out bot comments from an array of comments
|
|
112
|
-
* @param {Array<Object>} comments - Array of comment objects
|
|
113
|
-
* @returns {Array<Object>} Filtered array with bot comments removed
|
|
114
|
-
*/
|
|
115
|
-
export function filterBotComments(comments) {
|
|
116
|
-
if (!Array.isArray(comments)) {
|
|
117
|
-
return [];
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const filtered = comments.filter((comment) => !isBotComment(comment));
|
|
121
|
-
|
|
122
|
-
return filtered;
|
|
123
|
-
}
|