codecritique 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +82 -114
  2. package/package.json +10 -9
  3. package/src/content-retrieval.test.js +775 -0
  4. package/src/custom-documents.test.js +440 -0
  5. package/src/feedback-loader.test.js +529 -0
  6. package/src/llm.test.js +256 -0
  7. package/src/project-analyzer.test.js +747 -0
  8. package/src/rag-analyzer.js +12 -0
  9. package/src/rag-analyzer.test.js +1109 -0
  10. package/src/rag-review.test.js +317 -0
  11. package/src/setupTests.js +131 -0
  12. package/src/zero-shot-classifier-open.test.js +278 -0
  13. package/src/embeddings/cache-manager.js +0 -364
  14. package/src/embeddings/constants.js +0 -40
  15. package/src/embeddings/database.js +0 -921
  16. package/src/embeddings/errors.js +0 -208
  17. package/src/embeddings/factory.js +0 -447
  18. package/src/embeddings/file-processor.js +0 -851
  19. package/src/embeddings/model-manager.js +0 -337
  20. package/src/embeddings/similarity-calculator.js +0 -97
  21. package/src/embeddings/types.js +0 -113
  22. package/src/pr-history/analyzer.js +0 -579
  23. package/src/pr-history/bot-detector.js +0 -123
  24. package/src/pr-history/cli-utils.js +0 -204
  25. package/src/pr-history/comment-processor.js +0 -549
  26. package/src/pr-history/database.js +0 -819
  27. package/src/pr-history/github-client.js +0 -629
  28. package/src/technology-keywords.json +0 -753
  29. package/src/utils/command.js +0 -48
  30. package/src/utils/constants.js +0 -263
  31. package/src/utils/context-inference.js +0 -364
  32. package/src/utils/document-detection.js +0 -105
  33. package/src/utils/file-validation.js +0 -271
  34. package/src/utils/git.js +0 -232
  35. package/src/utils/language-detection.js +0 -170
  36. package/src/utils/logging.js +0 -24
  37. package/src/utils/markdown.js +0 -132
  38. package/src/utils/mobilebert-tokenizer.js +0 -141
  39. package/src/utils/pr-chunking.js +0 -276
  40. package/src/utils/string-utils.js +0 -28
@@ -1,579 +0,0 @@
1
- /**
2
- * Main PR History Analyzer
3
- *
4
- * Orchestrates the complete PR comment history analysis workflow using
5
- * GitHub API client, comment processor, and database storage.
6
- */
7
-
8
- import chalk from 'chalk';
9
- import { PRCommentProcessor } from './comment-processor.js';
10
- import { clearPRComments, getPRCommentsStats, getProcessedPRDateRange, shouldSkipPR, storePRCommentsBatch } from './database.js';
11
- import { GitHubAPIClient } from './github-client.js';
12
-
13
- /**
14
- * Progress tracking for PR analysis
15
- */
16
- class PRAnalysisProgress {
17
- constructor(repository) {
18
- this.repository = repository;
19
- this.progress = {
20
- repository,
21
- total_prs: 0,
22
- processed_prs: 0,
23
- total_comments: 0,
24
- processed_comments: 0,
25
- failed_comments: 0,
26
- last_processed_pr: null,
27
- last_processed_page: 0,
28
- start_time: new Date().toISOString(),
29
- last_updated: new Date().toISOString(),
30
- errors: [],
31
- status: 'not_started', // 'not_started', 'in_progress', 'completed', 'failed'
32
- };
33
- }
34
-
35
- async save() {
36
- this.progress.last_updated = new Date().toISOString();
37
- }
38
-
39
- async load() {
40
- return false;
41
- }
42
-
43
- updatePRs(total, processed) {
44
- this.progress.total_prs = total;
45
- this.progress.processed_prs = processed;
46
- }
47
-
48
- updateComments(total, processed, failed = 0) {
49
- this.progress.total_comments = total;
50
- this.progress.processed_comments = processed;
51
- this.progress.failed_comments = failed;
52
- }
53
-
54
- setLastProcessed(prNumber, page = 0) {
55
- this.progress.last_processed_pr = prNumber;
56
- this.progress.last_processed_page = page;
57
- }
58
-
59
- addError(error, context = '') {
60
- this.progress.errors.push({
61
- error: error.message,
62
- context,
63
- timestamp: new Date().toISOString(),
64
- });
65
- }
66
-
67
- setStatus(status) {
68
- this.progress.status = status;
69
- }
70
-
71
- getProgressSummary() {
72
- return {
73
- repository: this.progress.repository,
74
- status: this.progress.status,
75
- prs: `${this.progress.processed_prs}/${this.progress.total_prs}`,
76
- comments: `${this.progress.processed_comments}/${this.progress.total_comments}`,
77
- failed_comments: this.progress.failed_comments,
78
- errors: this.progress.errors.length,
79
- elapsed: this.getElapsedTime(),
80
- };
81
- }
82
-
83
- getElapsedTime() {
84
- const start = new Date(this.progress.start_time);
85
- const now = new Date();
86
- const elapsed = now - start;
87
- const hours = Math.floor(elapsed / (1000 * 60 * 60));
88
- const minutes = Math.floor((elapsed % (1000 * 60 * 60)) / (1000 * 60));
89
- const seconds = Math.floor((elapsed % (1000 * 60)) / 1000);
90
- return `${hours}h ${minutes}m ${seconds}s`;
91
- }
92
- }
93
-
94
- /**
95
- * Main PR History Analyzer class
96
- */
97
- export class PRHistoryAnalyzer {
98
- constructor(options = {}) {
99
- this.githubClient = null;
100
- this.commentProcessor = new PRCommentProcessor();
101
- this.progress = null;
102
- this.options = {
103
- concurrency: 2,
104
- batchSize: 50,
105
- skipDependabot: true,
106
- includeDrafts: false,
107
- ...options,
108
- };
109
- }
110
-
111
- /**
112
- * Initialize the analyzer with GitHub client
113
- * @param {string} token - GitHub API token
114
- */
115
- initialize(token) {
116
- this.githubClient = new GitHubAPIClient({
117
- token,
118
- requestTimeout: 30000,
119
- retries: 3,
120
- concurrency: this.options.concurrency,
121
- });
122
- }
123
-
124
- /**
125
- * Analyze PR comment history for a repository
126
- * @param {string} repository - Repository in format "owner/repo"
127
- * @param {Object} options - Analysis options
128
- * @returns {Promise<Object>} Analysis results
129
- */
130
- async analyzeRepository(repository, options = {}) {
131
- const {
132
- since = null,
133
- until = null,
134
- limit = null,
135
- resume = false,
136
- clearExisting = false,
137
- onProgress = null,
138
- projectPath = process.cwd(),
139
- } = options;
140
-
141
- // Initialize progress tracking
142
- this.progress = new PRAnalysisProgress(repository);
143
-
144
- // Load existing progress if resuming
145
- if (resume) {
146
- const loaded = await this.progress.load();
147
- if (loaded && this.progress.progress.status === 'completed') {
148
- console.log(chalk.green(`Analysis for ${repository} already completed.`));
149
- return await this.getAnalysisResults(repository, projectPath);
150
- }
151
- }
152
-
153
- // Clear existing data if requested
154
- if (clearExisting) {
155
- console.log(chalk.yellow(`Clearing existing PR comments for ${repository}...`));
156
- await clearPRComments(repository, projectPath);
157
- }
158
-
159
- try {
160
- this.progress.setStatus('in_progress');
161
- await this.progress.save();
162
-
163
- console.log(chalk.blue(`Starting PR comment analysis for ${repository}`));
164
- console.log(chalk.blue(`Options: concurrency=${this.options.concurrency}, batchSize=${this.options.batchSize}`));
165
-
166
- // Step 1: Fetch all merged PRs
167
- const prs = await this.fetchAllPRs(repository, { since, until, limit, resume, onProgress, projectPath });
168
-
169
- if (prs.length === 0) {
170
- console.log(chalk.yellow(`No merged PRs found for ${repository}`));
171
- this.progress.setStatus('completed');
172
- await this.progress.save();
173
- return { repository, total_prs: 0, total_comments: 0, patterns: [] };
174
- }
175
-
176
- console.log(chalk.green(`Found ${prs.length} merged PRs to analyze`));
177
- this.progress.updatePRs(prs.length, 0);
178
-
179
- // Step 2: Process PR comments
180
- const processedComments = await this.processPRComments(prs, { onProgress, projectPath });
181
-
182
- // Step 3: Store in database
183
- if (processedComments.length > 0) {
184
- console.log(chalk.blue(`Storing ${processedComments.length} processed comments in database...`));
185
- const storedCount = await storePRCommentsBatch(processedComments, projectPath);
186
- console.log(chalk.green(`Successfully stored ${storedCount} PR comments`));
187
- }
188
-
189
- // Step 4: Generate final results
190
- const results = await this.getAnalysisResults(repository, projectPath);
191
-
192
- this.progress.setStatus('completed');
193
- await this.progress.save();
194
-
195
- console.log(chalk.green(`Analysis completed for ${repository}`));
196
- console.log(chalk.green(`Processed ${results.total_prs} PRs with ${results.total_comments} comments`));
197
-
198
- return results;
199
- } catch (error) {
200
- console.error(chalk.red(`Error analyzing repository ${repository}: ${error.message}`));
201
- this.progress.addError(error, 'Repository analysis');
202
- this.progress.setStatus('failed');
203
- await this.progress.save();
204
- throw error;
205
- }
206
- }
207
-
208
- /**
209
- * Fetch all merged PRs from repository
210
- * @private
211
- * @param {string} repository - Repository in format "owner/repo"
212
- * @param {Object} options - Fetch options
213
- * @returns {Promise<Array>} Array of PRs
214
- */
215
- async fetchAllPRs(repository, options = {}) {
216
- const { since, until, limit, resume, onProgress, projectPath = process.cwd() } = options;
217
- const [owner, repo] = repository.split('/');
218
-
219
- console.log(chalk.blue(`Fetching merged PRs for ${repository}...`));
220
-
221
- try {
222
- const startPage = resume ? this.progress.progress.last_processed_page + 1 : 1;
223
-
224
- // Enable incremental updates by default unless explicit since/until dates are provided
225
- const shouldUseIncremental = !since && !until && !resume;
226
-
227
- const prs = await this.githubClient.fetchAllPRs(owner, repo, {
228
- since,
229
- until,
230
- limit,
231
- startPage,
232
- skipDependabot: this.options.skipDependabot,
233
- includeDrafts: this.options.includeDrafts,
234
- incremental: shouldUseIncremental,
235
- projectPath,
236
- onProgress: (pageProgress) => {
237
- this.progress.setLastProcessed(null, pageProgress.page);
238
- if (onProgress) {
239
- onProgress({
240
- stage: 'fetching_prs',
241
- current: pageProgress.page,
242
- total: pageProgress.estimatedPages || pageProgress.page,
243
- message: `Fetching PR page ${pageProgress.page}`,
244
- });
245
- }
246
- },
247
- });
248
-
249
- return prs.filter((pr) => pr.merged_at); // Ensure only merged PRs
250
- } catch (error) {
251
- console.error(chalk.red(`Error fetching PRs: ${error.message}`));
252
- this.progress.addError(error, 'Fetching PRs');
253
- throw error;
254
- }
255
- }
256
-
257
- /**
258
- * Process comments for all PRs
259
- * @private
260
- * @param {Array} prs - Array of PR objects
261
- * @param {Object} options - Processing options
262
- * @returns {Promise<Array>} Array of processed comments
263
- */
264
- async processPRComments(prs, options = {}) {
265
- const { onProgress, projectPath = process.cwd() } = options;
266
- const allProcessedComments = [];
267
- let totalComments = 0;
268
- let processedComments = 0;
269
- let failedComments = 0;
270
-
271
- console.log(chalk.blue(`Processing comments for ${prs.length} PRs...`));
272
- console.log(chalk.cyan(`This may take several minutes for large repositories...`));
273
-
274
- // Get processed PR date range to skip already processed PRs
275
- console.log(chalk.blue(`Checking for already processed PRs...`));
276
- const { oldestPR, newestPR } = await getProcessedPRDateRange(this.progress.repository, projectPath);
277
-
278
- let skippedPRs = 0;
279
- let prsToProcess = prs;
280
-
281
- if (oldestPR && newestPR) {
282
- console.log(chalk.blue(`Found processed PR range: ${oldestPR} to ${newestPR}`));
283
- prsToProcess = prs.filter((pr) => {
284
- const shouldSkip = shouldSkipPR(pr, oldestPR, newestPR);
285
- if (shouldSkip) {
286
- skippedPRs++;
287
- }
288
- return !shouldSkip;
289
- });
290
- console.log(chalk.green(`Skipping ${skippedPRs} already processed PRs, processing ${prsToProcess.length} new PRs`));
291
- } else {
292
- console.log(chalk.blue(`No previously processed PRs found, processing all ${prs.length} PRs`));
293
- }
294
-
295
- if (prsToProcess.length === 0) {
296
- console.log(chalk.yellow(`All PRs have already been processed!`));
297
- return allProcessedComments;
298
- }
299
-
300
- // First pass: count total comments for better progress tracking
301
- console.log(chalk.blue(`Counting total comments across ${prsToProcess.length} PRs to process...`));
302
- let estimatedComments = 0;
303
- for (let i = 0; i < Math.min(prsToProcess.length, 10); i++) {
304
- estimatedComments += (prsToProcess[i].comments || 0) + (prsToProcess[i].review_comments || 0);
305
- }
306
- const avgCommentsPerPR = estimatedComments / Math.min(prsToProcess.length, 10);
307
- const totalEstimatedComments = Math.floor(avgCommentsPerPR * prsToProcess.length);
308
- console.log(chalk.blue(`Estimated ${totalEstimatedComments} total comments to process`));
309
-
310
- // Process PRs in batches
311
- for (let i = 0; i < prsToProcess.length; i += this.options.batchSize) {
312
- const batch = prsToProcess.slice(i, i + this.options.batchSize);
313
- const batchNumber = Math.floor(i / this.options.batchSize) + 1;
314
- const totalBatches = Math.ceil(prsToProcess.length / this.options.batchSize);
315
-
316
- console.log(
317
- chalk.blue(
318
- `Processing PR batch ${batchNumber}/${totalBatches} (PRs ${i + 1}-${Math.min(i + this.options.batchSize, prsToProcess.length)})`
319
- )
320
- );
321
-
322
- const batchStartTime = Date.now();
323
-
324
- // Process PRs in parallel within batch
325
- const batchPromises = batch.map(async (pr, batchIndex) => {
326
- try {
327
- const prIndex = i + batchIndex;
328
- const prComments = await this.processSinglePR(pr);
329
-
330
- this.progress.setLastProcessed(pr.number);
331
- this.progress.updatePRs(prsToProcess.length, prIndex + 1);
332
-
333
- if (onProgress) {
334
- onProgress({
335
- stage: 'processing_comments',
336
- current: prIndex + 1,
337
- total: prsToProcess.length,
338
- message: `Processed PR #${pr.number} (${prComments.length} comments)`,
339
- });
340
- }
341
-
342
- return prComments;
343
- } catch (error) {
344
- console.error(chalk.red(`Error processing PR #${pr.number}: ${error.message}`));
345
- this.progress.addError(error, `PR #${pr.number}`);
346
- return [];
347
- }
348
- });
349
-
350
- // Wait for batch to complete
351
- const batchResults = await Promise.all(batchPromises);
352
-
353
- // Flatten and collect results
354
- let batchCommentCount = 0;
355
- for (const prComments of batchResults) {
356
- totalComments += prComments.length;
357
- const validComments = prComments.filter((comment) => comment !== null);
358
- processedComments += validComments.length;
359
- failedComments += prComments.length - validComments.length;
360
- allProcessedComments.push(...validComments);
361
- batchCommentCount += prComments.length;
362
- }
363
-
364
- const batchDuration = (Date.now() - batchStartTime) / 1000;
365
- console.log(
366
- chalk.blue(`Batch ${batchNumber}/${totalBatches} completed: ${batchCommentCount} comments in ${batchDuration.toFixed(1)}s`)
367
- );
368
- // Calculate progress percentage, handling case where totalEstimatedComments is 0
369
- const progressPercentage = totalEstimatedComments > 0 ? ((processedComments / totalEstimatedComments) * 100).toFixed(1) : 'unknown';
370
-
371
- const progressText =
372
- totalEstimatedComments > 0
373
- ? `Progress: ${processedComments}/${totalEstimatedComments} comments processed (${progressPercentage}%)`
374
- : `Progress: ${processedComments} comments processed`;
375
-
376
- console.log(chalk.blue(progressText));
377
-
378
- this.progress.updateComments(totalComments, processedComments, failedComments);
379
- await this.progress.save();
380
-
381
- // Small delay between batches to be gentle on APIs
382
- if (i + this.options.batchSize < prsToProcess.length) {
383
- await new Promise((resolve) => setTimeout(resolve, 1000));
384
- }
385
- }
386
-
387
- console.log(chalk.green(`Processed ${processedComments}/${totalComments} comments from ${prsToProcess.length} PRs`));
388
- if (skippedPRs > 0) {
389
- console.log(chalk.blue(`Skipped ${skippedPRs} already processed PRs`));
390
- }
391
- if (failedComments > 0) {
392
- console.log(chalk.yellow(`Failed to process ${failedComments} comments`));
393
- }
394
-
395
- return allProcessedComments;
396
- }
397
-
398
- /**
399
- * Process comments for a single PR
400
- * @private
401
- * @param {Object} pr - PR object
402
- * @returns {Promise<Array>} Array of processed comments
403
- */
404
- async processSinglePR(pr) {
405
- try {
406
- const [owner, repo] = this.progress.repository.split('/');
407
-
408
- // Fetch all types of comments for this PR
409
- const [reviewComments, issueComments, prFiles] = await Promise.all([
410
- this.githubClient.getPRReviewComments(owner, repo, pr.number),
411
- this.githubClient.getPRIssueComments(owner, repo, pr.number),
412
- this.githubClient.getPRFiles(owner, repo, pr.number),
413
- ]);
414
-
415
- // Combine all comments
416
- const allComments = [
417
- ...reviewComments.map((comment) => ({ ...comment, type: 'review' })),
418
- ...issueComments.map((comment) => ({ ...comment, type: 'issue' })),
419
- ];
420
-
421
- if (allComments.length === 0) {
422
- return [];
423
- }
424
-
425
- // Create PR context
426
- const prContext = {
427
- pr: {
428
- number: pr.number,
429
- repository: this.progress.repository,
430
- },
431
- files: prFiles,
432
- };
433
-
434
- // Process comments using comment processor
435
- const processedComments = await this.commentProcessor.processBatch(allComments, prContext);
436
- return processedComments;
437
- } catch (error) {
438
- console.error(chalk.red(`Error processing PR #${pr.number}: ${error.message}`));
439
- throw error;
440
- }
441
- }
442
-
443
- /**
444
- * Get analysis results from database
445
- * @private
446
- * @param {string} repository - Repository name
447
- * @param {string} projectPath - Project path for filtering (optional, defaults to cwd)
448
- * @returns {Promise<Object>} Analysis results
449
- */
450
- async getAnalysisResults(repository, projectPath = process.cwd()) {
451
- try {
452
- const stats = await getPRCommentsStats(repository, projectPath);
453
-
454
- // Ensure stats has the expected structure
455
- const safeStats = {
456
- total_comments: stats?.total_comments || 0,
457
- comment_types: stats?.comment_types || {},
458
- issue_categories: stats?.issue_categories || {},
459
- severity_levels: stats?.severity_levels || {},
460
- authors: stats?.authors || {},
461
- repositories: stats?.repositories || {},
462
- };
463
-
464
- // Extract patterns from statistics
465
- const patterns = [];
466
-
467
- // Add comment type patterns
468
- try {
469
- for (const [type, count] of Object.entries(safeStats.comment_types)) {
470
- patterns.push({
471
- type: 'comment_type',
472
- name: type,
473
- count,
474
- percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
475
- });
476
- }
477
- } catch (error) {
478
- console.warn(chalk.yellow(`Error processing comment type patterns: ${error.message}`));
479
- }
480
-
481
- // Add issue category patterns
482
- try {
483
- for (const [category, count] of Object.entries(safeStats.issue_categories)) {
484
- patterns.push({
485
- type: 'issue_category',
486
- name: category,
487
- count,
488
- percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
489
- });
490
- }
491
- } catch (error) {
492
- console.warn(chalk.yellow(`Error processing issue category patterns: ${error.message}`));
493
- }
494
-
495
- // Add severity patterns
496
- try {
497
- for (const [severity, count] of Object.entries(safeStats.severity_levels)) {
498
- patterns.push({
499
- type: 'severity',
500
- name: severity,
501
- count,
502
- percentage: safeStats.total_comments > 0 ? ((count / safeStats.total_comments) * 100).toFixed(1) : '0.0',
503
- });
504
- }
505
- } catch (error) {
506
- console.warn(chalk.yellow(`Error processing severity patterns: ${error.message}`));
507
- }
508
-
509
- // Calculate total PRs safely
510
- let totalPRs = 0;
511
- try {
512
- const repoValues = Object.values(safeStats.repositories);
513
- totalPRs = repoValues.length > 0 ? Math.max(...repoValues) : 0;
514
- } catch (error) {
515
- console.warn(chalk.yellow(`Error calculating total PRs: ${error.message}`));
516
- totalPRs = 0;
517
- }
518
-
519
- // Calculate top authors safely
520
- let topAuthors = [];
521
- try {
522
- topAuthors = Object.entries(safeStats.authors)
523
- .sort(([, a], [, b]) => b - a)
524
- .slice(0, 10)
525
- .map(([author, count]) => ({ author, count }));
526
- } catch (error) {
527
- console.warn(chalk.yellow(`Error calculating top authors: ${error.message}`));
528
- topAuthors = [];
529
- }
530
-
531
- return {
532
- repository,
533
- total_prs: totalPRs,
534
- total_comments: safeStats.total_comments,
535
- comment_types: safeStats.comment_types,
536
- issue_categories: safeStats.issue_categories,
537
- severity_levels: safeStats.severity_levels,
538
- top_authors: topAuthors,
539
- patterns,
540
- analysis_date: new Date().toISOString(),
541
- };
542
- } catch (error) {
543
- console.error(chalk.red(`Error getting analysis results: ${error.message}`));
544
- return {
545
- repository,
546
- total_prs: 0,
547
- total_comments: 0,
548
- patterns: [],
549
- error: error.message,
550
- };
551
- }
552
- }
553
-
554
- /**
555
- * Resume interrupted analysis
556
- * @param {string} repository - Repository name
557
- * @param {Object} options - Resume options
558
- * @returns {Promise<Object>} Analysis results
559
- */
560
- async resumeAnalysis(repository, options = {}) {
561
- return this.analyzeRepository(repository, { ...options, resume: true });
562
- }
563
-
564
- /**
565
- * Get progress status for repository
566
- * @param {string} repository - Repository name
567
- * @returns {Promise<Object>} Progress status
568
- */
569
- async getProgressStatus(repository) {
570
- const progress = new PRAnalysisProgress(repository);
571
- const loaded = await progress.load();
572
-
573
- if (!loaded) {
574
- return { repository, status: 'not_started' };
575
- }
576
-
577
- return progress.getProgressSummary();
578
- }
579
- }
@@ -1,123 +0,0 @@
1
- /**
2
- * Bot Detection Utility
3
- *
4
- * Detects and filters out bot comments from PR analysis.
5
- * Bots provide automated feedback that isn't useful for human review pattern analysis.
6
- */
7
-
8
- /**
9
- * Common bot patterns found in GitHub usernames and comment content
10
- */
11
- const BOT_PATTERNS = {
12
- // Username patterns
13
- usernames: [
14
- /\[bot\]$/i, // e.g., sonarqubecloud[bot], dependabot[bot]
15
- /^bot-/i, // e.g., bot-reviewer
16
- /-bot$/i, // e.g., review-bot
17
- /^dependabot/i, // Dependabot variations
18
- /^renovate/i, // Renovate bot variations
19
- /^github-actions/i, // GitHub Actions bot
20
- /^codecov/i, // Codecov bot
21
- /^sonarcloud/i, // SonarCloud variations
22
- /^sonarqube/i, // SonarQube variations
23
- /^snyk/i, // Snyk security bot
24
- /^greenkeeper/i, // Greenkeeper bot
25
- /^semantic-release/i, // Semantic release bot
26
- /^allcontributors/i, // All contributors bot
27
- /^stale/i, // Stale bot
28
- /^mergify/i, // Mergify bot
29
- /^auto-merge/i, // Auto-merge bots
30
- /^ci-bot/i, // CI bots
31
- /^deploy-bot/i, // Deploy bots
32
- ],
33
- };
34
-
35
- /**
36
- * Known bot usernames (exact matches)
37
- */
38
- const KNOWN_BOTS = new Set([
39
- 'dependabot[bot]',
40
- 'renovate[bot]',
41
- 'github-actions[bot]',
42
- 'codecov[bot]',
43
- 'sonarqubecloud[bot]',
44
- 'sonarcloud[bot]',
45
- 'snyk[bot]',
46
- 'greenkeeper[bot]',
47
- 'semantic-release-bot',
48
- 'allcontributors[bot]',
49
- 'stale[bot]',
50
- 'mergify[bot]',
51
- 'auto-merge-bot',
52
- 'ci-bot',
53
- 'deploy-bot',
54
- 'vercel[bot]',
55
- 'netlify[bot]',
56
- 'heroku[bot]',
57
- 'circleci[bot]',
58
- 'travis[bot]',
59
- 'jenkins[bot]',
60
- 'azure-pipelines[bot]',
61
- 'gitpod[bot]',
62
- 'codesandbox[bot]',
63
- 'deepsource[bot]',
64
- 'codeclimate[bot]',
65
- 'codebeat[bot]',
66
- 'codacy[bot]',
67
- 'houndci-bot',
68
- 'danger[bot]',
69
- 'prettier[bot]',
70
- 'eslint[bot]',
71
- 'typescript[bot]',
72
- ]);
73
-
74
- /**
75
- * Check if a username indicates a bot account
76
- * @param {string} username - GitHub username to check
77
- * @returns {boolean} True if username appears to be a bot
78
- */
79
- function isBotUsername(username) {
80
- if (!username || typeof username !== 'string') {
81
- return false;
82
- }
83
-
84
- const normalizedUsername = username.toLowerCase().trim();
85
-
86
- // Check exact matches first (most reliable)
87
- if (KNOWN_BOTS.has(username) || KNOWN_BOTS.has(normalizedUsername)) {
88
- return true;
89
- }
90
-
91
- // Check username patterns
92
- return BOT_PATTERNS.usernames.some((pattern) => pattern.test(username));
93
- }
94
-
95
- /**
96
- * Bot detection for a comment based only on username
97
- * @param {Object} comment - Comment object with user and body properties
98
- * @returns {boolean} True if comment appears to be from a bot
99
- */
100
- function isBotComment(comment) {
101
- if (!comment) {
102
- return false;
103
- }
104
-
105
- // Only check username (most reliable indicator)
106
- const username = comment.user?.login || comment.author_login || comment.author;
107
- return username ? isBotUsername(username) : false;
108
- }
109
-
110
- /**
111
- * Filter out bot comments from an array of comments
112
- * @param {Array<Object>} comments - Array of comment objects
113
- * @returns {Array<Object>} Filtered array with bot comments removed
114
- */
115
- export function filterBotComments(comments) {
116
- if (!Array.isArray(comments)) {
117
- return [];
118
- }
119
-
120
- const filtered = comments.filter((comment) => !isBotComment(comment));
121
-
122
- return filtered;
123
- }