codecritique 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +82 -114
  2. package/package.json +10 -9
  3. package/src/content-retrieval.test.js +775 -0
  4. package/src/custom-documents.test.js +440 -0
  5. package/src/feedback-loader.test.js +529 -0
  6. package/src/llm.test.js +256 -0
  7. package/src/project-analyzer.test.js +747 -0
  8. package/src/rag-analyzer.js +12 -0
  9. package/src/rag-analyzer.test.js +1109 -0
  10. package/src/rag-review.test.js +317 -0
  11. package/src/setupTests.js +131 -0
  12. package/src/zero-shot-classifier-open.test.js +278 -0
  13. package/src/embeddings/cache-manager.js +0 -364
  14. package/src/embeddings/constants.js +0 -40
  15. package/src/embeddings/database.js +0 -921
  16. package/src/embeddings/errors.js +0 -208
  17. package/src/embeddings/factory.js +0 -447
  18. package/src/embeddings/file-processor.js +0 -851
  19. package/src/embeddings/model-manager.js +0 -337
  20. package/src/embeddings/similarity-calculator.js +0 -97
  21. package/src/embeddings/types.js +0 -113
  22. package/src/pr-history/analyzer.js +0 -579
  23. package/src/pr-history/bot-detector.js +0 -123
  24. package/src/pr-history/cli-utils.js +0 -204
  25. package/src/pr-history/comment-processor.js +0 -549
  26. package/src/pr-history/database.js +0 -819
  27. package/src/pr-history/github-client.js +0 -629
  28. package/src/technology-keywords.json +0 -753
  29. package/src/utils/command.js +0 -48
  30. package/src/utils/constants.js +0 -263
  31. package/src/utils/context-inference.js +0 -364
  32. package/src/utils/document-detection.js +0 -105
  33. package/src/utils/file-validation.js +0 -271
  34. package/src/utils/git.js +0 -232
  35. package/src/utils/language-detection.js +0 -170
  36. package/src/utils/logging.js +0 -24
  37. package/src/utils/markdown.js +0 -132
  38. package/src/utils/mobilebert-tokenizer.js +0 -141
  39. package/src/utils/pr-chunking.js +0 -276
  40. package/src/utils/string-utils.js +0 -28
@@ -1,629 +0,0 @@
1
- /**
2
- * GitHub API Client for PR History Analysis
3
- *
4
- * Provides comprehensive GitHub API integration for fetching complete PR history
5
- * with intelligent pagination, filtering, and rate limiting for large repositories.
6
- */
7
-
8
- import fs from 'node:fs/promises';
9
- import path from 'node:path';
10
- import { Octokit } from '@octokit/rest';
11
- import chalk from 'chalk';
12
- import { getLastAnalysisTimestamp } from './database.js';
13
-
14
- // Configuration constants
15
- const DEFAULT_PER_PAGE = 100;
16
- const MAX_RETRIES = 3;
17
- const BASE_RETRY_DELAY = 1000; // 1 second
18
- const RATE_LIMIT_BUFFER = 100; // Keep 100 requests in reserve
19
- const PROGRESS_SAVE_INTERVAL = 10; // Save progress every 10 PRs
20
-
21
- /**
22
- * GitHub API Client with comprehensive PR fetching capabilities
23
- */
24
- export class GitHubAPIClient {
25
- constructor(options = {}) {
26
- this.token = options.token || process.env.GITHUB_TOKEN;
27
-
28
- // Only validate token if we're not injecting a mock Octokit
29
- if (!options.octokit) {
30
- this.validateToken();
31
- }
32
-
33
- // Allow injection of Octokit instance for testing
34
- this.octokit =
35
- options.octokit ||
36
- new Octokit({
37
- auth: this.token,
38
- userAgent: 'codecritique-pr-history/1.0.0',
39
- request: {
40
- timeout: 30000, // 30 second timeout
41
- },
42
- });
43
-
44
- this.rateLimitInfo = null;
45
- this.progressCallback = options.progressCallback || null;
46
- this.resumeFile = options.resumeFile || null;
47
- this.debug = options.debug || false;
48
- }
49
-
50
- /**
51
- * Validate GitHub token exists and has proper format
52
- */
53
- validateToken() {
54
- if (!this.token) {
55
- throw new Error('GitHub token required. Set GITHUB_TOKEN environment variable or pass token option.');
56
- }
57
-
58
- // Basic token format validation
59
- if (!this.token.match(/^gh[ps]_[a-zA-Z0-9]{36,}$/)) {
60
- console.warn(chalk.yellow('Warning: Token format appears invalid. Expected GitHub token format.'));
61
- }
62
- }
63
-
64
- /**
65
- * Test token permissions for a specific repository
66
- */
67
- async testTokenPermissions(owner, repo) {
68
- try {
69
- await this.octokit.repos.get({ owner, repo });
70
- this.log(`✓ Token has access to ${owner}/${repo}`);
71
- return true;
72
- } catch (error) {
73
- if (error.status === 403) {
74
- throw new Error(`Token lacks permission to access repository ${owner}/${repo}`);
75
- } else if (error.status === 404) {
76
- throw new Error(`Repository ${owner}/${repo} not found or not accessible`);
77
- }
78
- throw error;
79
- }
80
- }
81
-
82
- /**
83
- * Get the last analysis date for a repository to enable incremental updates
84
- */
85
- async getLastAnalysisDate(owner, repo, projectPath) {
86
- try {
87
- const lastTimestamp = await getLastAnalysisTimestamp(`${owner}/${repo}`, projectPath);
88
-
89
- if (lastTimestamp) {
90
- this.log(`Last analysis found: ${lastTimestamp}`);
91
- return new Date(lastTimestamp);
92
- }
93
-
94
- this.log('No previous analysis found');
95
- return null;
96
- } catch (error) {
97
- this.log(`Error getting last analysis date: ${error.message}`, 'warn');
98
- return null;
99
- }
100
- }
101
-
102
- /**
103
- * Calculate incremental date range for efficient fetching
104
- */
105
- calculateIncrementalRange(lastAnalysisDate, options = {}) {
106
- const { forceFullRefresh = false, bufferDays = 7 } = options;
107
-
108
- if (forceFullRefresh || !lastAnalysisDate) {
109
- this.log('Using full refresh mode');
110
- return {
111
- since: options.since || null,
112
- until: options.until || null,
113
- incremental: false,
114
- reason: forceFullRefresh ? 'force refresh requested' : 'no previous analysis',
115
- };
116
- }
117
-
118
- // Add buffer days to account for updated PRs
119
- const bufferDate = new Date(lastAnalysisDate);
120
- bufferDate.setDate(bufferDate.getDate() - bufferDays);
121
-
122
- // Use the earlier of buffer date or explicit since date
123
- const effectiveSince = options.since ? new Date(Math.min(new Date(options.since), bufferDate)) : bufferDate;
124
-
125
- this.log(`Incremental update from ${effectiveSince.toISOString()} (${bufferDays} day buffer)`);
126
-
127
- return {
128
- since: effectiveSince.toISOString(),
129
- until: options.until || null,
130
- incremental: true,
131
- reason: `incremental from ${lastAnalysisDate.toISOString()} with ${bufferDays} day buffer`,
132
- };
133
- }
134
-
135
- /**
136
- * Resume analysis from last saved position
137
- */
138
- async resumeFromLastPosition() {
139
- try {
140
- const progress = await this.loadProgress(true);
141
-
142
- if (!progress.prs || progress.prs.length === 0) {
143
- this.log('No resume data found, starting fresh');
144
- return null;
145
- }
146
-
147
- // Get the last processed PR date for incremental calculation
148
- const lastPRDate = progress.prs.reduce((latest, pr) => {
149
- const prDate = new Date(pr.merged_at || pr.updated_at);
150
- return prDate > latest ? prDate : latest;
151
- }, new Date(0));
152
-
153
- this.log(`Resume data found: ${progress.prs.length} PRs, last date: ${lastPRDate.toISOString()}`);
154
-
155
- return {
156
- prs: progress.prs,
157
- lastPage: progress.lastPage || 1,
158
- lastDate: lastPRDate,
159
- totalProcessed: progress.totalProcessed || 0,
160
- };
161
- } catch (error) {
162
- this.log(`Error loading resume data: ${error.message}`, 'warn');
163
- return null;
164
- }
165
- }
166
-
167
- /**
168
- * Detect changed PRs since last analysis for delta processing
169
- */
170
- async detectChangedPRs(owner, repo, prList, lastAnalysisDate) {
171
- if (!lastAnalysisDate || !Array.isArray(prList)) {
172
- return prList; // Return all PRs if no baseline
173
- }
174
-
175
- const changedPRs = prList.filter((pr) => {
176
- const updatedDate = new Date(pr.updated_at);
177
- const mergedDate = new Date(pr.merged_at || pr.updated_at);
178
-
179
- // Include if updated or merged after last analysis
180
- return updatedDate > lastAnalysisDate || mergedDate > lastAnalysisDate;
181
- });
182
-
183
- this.log(`Delta processing: ${changedPRs.length} changed PRs out of ${prList.length} total`);
184
- return changedPRs;
185
- }
186
-
187
- /**
188
- * Fetch all merged PRs from a repository with intelligent pagination and incremental support
189
- */
190
- async fetchAllPRs(owner, repo, options = {}) {
191
- const { limit = null, skipDependabot = true, includeDrafts = false, resume = false, incremental = false, projectPath = null } = options;
192
-
193
- // Handle incremental updates
194
- let effectiveOptions = { ...options };
195
- if (incremental && projectPath) {
196
- this.log('Performing incremental analysis...');
197
- const lastAnalysisDate = await this.getLastAnalysisDate(owner, repo, projectPath);
198
- const incrementalRange = this.calculateIncrementalRange(lastAnalysisDate, options);
199
-
200
- effectiveOptions = {
201
- ...options,
202
- since: incrementalRange.since,
203
- until: incrementalRange.until,
204
- };
205
-
206
- this.log(`Incremental range: ${incrementalRange.reason}`);
207
- }
208
-
209
- // Load progress if resuming
210
- let progress = await this.loadProgress(resume);
211
- let allPRs = progress.prs || [];
212
- let page = progress.lastPage || 1;
213
- let totalProcessed = progress.totalProcessed || 0;
214
-
215
- this.log(`Starting PR fetch for ${owner}/${repo} from page ${page}`);
216
-
217
- // Test repository access
218
- await this.testTokenPermissions(owner, repo);
219
-
220
- let hasMore = true;
221
- const startTime = Date.now();
222
-
223
- while (hasMore) {
224
- try {
225
- const response = await this.callWithRetry(async () => {
226
- const apiParams = {
227
- owner,
228
- repo,
229
- state: 'closed',
230
- sort: 'updated',
231
- direction: 'desc',
232
- per_page: DEFAULT_PER_PAGE,
233
- page,
234
- };
235
-
236
- // Add since parameter to API call for efficient server-side filtering
237
- if (effectiveOptions.since) {
238
- apiParams.since = effectiveOptions.since;
239
- }
240
-
241
- return await this.octokit.pulls.list(apiParams);
242
- });
243
-
244
- let prs = response.data;
245
-
246
- // Apply filters (API since filters by updated_at, we still need client-side since for merged_at)
247
- prs = this.filterPRs(prs, {
248
- skipDependabot,
249
- includeDrafts,
250
- since: effectiveOptions.since, // Keep since filter for merged_at filtering
251
- until: effectiveOptions.until, // Keep until filter for upper bound
252
- });
253
-
254
- allPRs.push(...prs);
255
- totalProcessed += response.data.length; // Count all fetched PRs, not just filtered ones
256
-
257
- // Check stopping conditions
258
- if (response.data.length < DEFAULT_PER_PAGE) {
259
- hasMore = false;
260
- this.log('Reached end of PR list');
261
- } else if (limit && allPRs.length >= limit) {
262
- allPRs = allPRs.slice(0, limit);
263
- hasMore = false;
264
- this.log(`Reached limit of ${limit} PRs`);
265
- } else if (effectiveOptions.since && prs.length === 0 && response.data.length > 0) {
266
- // If we're using since filter and getting 0 results after filtering, we've likely reached the date boundary
267
- hasMore = false;
268
- this.log('Reached date boundary (no PRs match criteria), stopping fetch');
269
- }
270
-
271
- // Progress reporting - only log every 10 pages to reduce verbosity
272
- if (page % 10 === 0 || !hasMore) {
273
- const elapsedTime = (Date.now() - startTime) / 1000;
274
- const prsPerSecond = totalProcessed / elapsedTime;
275
- this.log(
276
- `Page ${page}: ${allPRs.length} PRs collected (rate: ${prsPerSecond.toFixed(1)} PRs/s, elapsed: ${elapsedTime.toFixed(1)}s)`
277
- );
278
- }
279
-
280
- if (this.progressCallback) {
281
- const elapsedTime = (Date.now() - startTime) / 1000;
282
- const prsPerSecond = totalProcessed / elapsedTime;
283
- const estimatedTotal = hasMore ? Math.ceil(allPRs.length * 1.5) : allPRs.length;
284
-
285
- this.progressCallback({
286
- type: 'pr_fetch',
287
- page,
288
- totalPRs: allPRs.length,
289
- processedPRs: totalProcessed,
290
- estimatedTotal,
291
- rateLimitRemaining: this.rateLimitInfo?.remaining,
292
- elapsedTime,
293
- prsPerSecond,
294
- });
295
- }
296
-
297
- // Save progress periodically
298
- if (page % PROGRESS_SAVE_INTERVAL === 0) {
299
- await this.saveProgress({
300
- prs: allPRs,
301
- lastPage: page,
302
- totalProcessed,
303
- repository: `${owner}/${repo}`,
304
- timestamp: new Date().toISOString(),
305
- });
306
- }
307
-
308
- page++;
309
-
310
- // Rate limiting
311
- await this.respectRateLimit();
312
- } catch (error) {
313
- this.log(`Error fetching page ${page}: ${error.message}`, 'error');
314
- throw error;
315
- }
316
- }
317
-
318
- const duration = (Date.now() - startTime) / 1000;
319
- this.log(`Completed PR fetch: ${allPRs.length} PRs in ${duration.toFixed(2)}s`);
320
-
321
- // Save final progress
322
- await this.saveProgress({
323
- prs: allPRs,
324
- lastPage: page,
325
- totalProcessed,
326
- repository: `${owner}/${repo}`,
327
- completed: true,
328
- timestamp: new Date().toISOString(),
329
- });
330
-
331
- return allPRs;
332
- }
333
-
334
- /**
335
- * Filter PRs based on specified criteria
336
- */
337
- filterPRs(prs, options) {
338
- const { skipDependabot, includeDrafts, since, until } = options;
339
-
340
- return prs.filter((pr) => {
341
- // Only merged PRs
342
- if (!pr.merged_at) {
343
- return false;
344
- }
345
-
346
- // Skip Dependabot PRs if requested
347
- if (skipDependabot && this.isDependabotPR(pr)) {
348
- return false;
349
- }
350
-
351
- // Skip drafts if not included
352
- if (!includeDrafts && pr.draft) {
353
- return false;
354
- }
355
-
356
- // Date filtering
357
- const mergedDate = new Date(pr.merged_at);
358
- if (since && mergedDate < new Date(since)) {
359
- return false;
360
- }
361
- if (until && mergedDate > new Date(until)) {
362
- return false;
363
- }
364
-
365
- return true;
366
- });
367
- }
368
-
369
- /**
370
- * Check if PR is from Dependabot
371
- */
372
- isDependabotPR(pr) {
373
- const dependabotUsers = ['dependabot[bot]', 'dependabot-preview[bot]'];
374
- return dependabotUsers.includes(pr.user?.login?.toLowerCase());
375
- }
376
-
377
- /**
378
- * Determine if we should stop fetching based on date boundaries
379
- */
380
- shouldStopFetching(prs, options) {
381
- const { since } = options;
382
-
383
- if (!since || prs.length === 0) {
384
- return false;
385
- }
386
-
387
- // Check if all PRs in this batch are older than the since date
388
- const sinceDate = new Date(since);
389
- return prs.every((pr) => {
390
- const mergedDate = new Date(pr.merged_at);
391
- return mergedDate < sinceDate;
392
- });
393
- }
394
-
395
- /**
396
- * Fetch detailed PR information including comments and reviews
397
- */
398
- async fetchPRDetails(owner, repo, prNumber) {
399
- try {
400
- // Proactively check rate limits before making multiple parallel requests
401
- await this.respectRateLimit();
402
-
403
- const [prDetails, reviewComments, issueComments, reviews, files] = await Promise.all([
404
- this.callWithRetry(() => this.octokit.pulls.get({ owner, repo, pull_number: prNumber })),
405
- this.callWithRetry(() => this.octokit.pulls.listReviewComments({ owner, repo, pull_number: prNumber })),
406
- this.callWithRetry(() => this.octokit.issues.listComments({ owner, repo, issue_number: prNumber })),
407
- this.callWithRetry(() => this.octokit.pulls.listReviews({ owner, repo, pull_number: prNumber })),
408
- this.callWithRetry(() => this.octokit.pulls.listFiles({ owner, repo, pull_number: prNumber })),
409
- ]);
410
-
411
- return {
412
- pr: prDetails.data,
413
- reviewComments: reviewComments.data,
414
- issueComments: issueComments.data,
415
- reviews: reviews.data,
416
- files: files.data,
417
- };
418
- } catch (error) {
419
- this.log(`Error fetching details for PR #${prNumber}: ${error.message}`, 'error');
420
- throw error;
421
- }
422
- }
423
-
424
- /**
425
- * Fetch PR review comments (inline code comments)
426
- */
427
- async getPRReviewComments(owner, repo, prNumber) {
428
- try {
429
- // Proactively check rate limits before making the request
430
- await this.respectRateLimit();
431
-
432
- const response = await this.callWithRetry(() => this.octokit.pulls.listReviewComments({ owner, repo, pull_number: prNumber }));
433
- return response.data;
434
- } catch (error) {
435
- this.log(`Error fetching review comments for PR #${prNumber}: ${error.message}`, 'error');
436
- throw error;
437
- }
438
- }
439
-
440
- /**
441
- * Fetch PR issue comments (general discussion comments)
442
- */
443
- async getPRIssueComments(owner, repo, prNumber) {
444
- try {
445
- // Proactively check rate limits before making the request
446
- await this.respectRateLimit();
447
-
448
- const response = await this.callWithRetry(() => this.octokit.issues.listComments({ owner, repo, issue_number: prNumber }));
449
- return response.data;
450
- } catch (error) {
451
- this.log(`Error fetching issue comments for PR #${prNumber}: ${error.message}`, 'error');
452
- throw error;
453
- }
454
- }
455
-
456
- /**
457
- * Fetch PR files (changed files in the PR)
458
- */
459
- async getPRFiles(owner, repo, prNumber) {
460
- try {
461
- // Proactively check rate limits before making the request
462
- await this.respectRateLimit();
463
-
464
- const response = await this.callWithRetry(() => this.octokit.pulls.listFiles({ owner, repo, pull_number: prNumber }));
465
- return response.data;
466
- } catch (error) {
467
- this.log(`Error fetching files for PR #${prNumber}: ${error.message}`, 'error');
468
- throw error;
469
- }
470
- }
471
-
472
- /**
473
- * Make API call with retry logic and exponential backoff
474
- */
475
- async callWithRetry(apiCall, maxRetries = MAX_RETRIES) {
476
- let lastError;
477
-
478
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
479
- try {
480
- const result = await apiCall();
481
-
482
- // Update rate limit info
483
- if (result.headers) {
484
- this.updateRateLimitInfo(result.headers);
485
- }
486
-
487
- return result;
488
- } catch (error) {
489
- lastError = error;
490
-
491
- if (this.isRetryableError(error) && attempt < maxRetries) {
492
- const delay = this.calculateBackoffDelay(attempt, error);
493
- this.log(`Attempt ${attempt} failed, retrying in ${delay}ms: ${error.message}`, 'warn');
494
- await this.sleep(delay);
495
- } else {
496
- break;
497
- }
498
- }
499
- }
500
-
501
- throw lastError;
502
- }
503
-
504
- /**
505
- * Check if error is retryable
506
- */
507
- isRetryableError(error) {
508
- return (
509
- error.status === 429 || // Rate limit
510
- error.status === 502 || // Bad gateway
511
- error.status === 503 || // Service unavailable
512
- error.status === 504 || // Gateway timeout
513
- error.status >= 500 || // Server errors
514
- error.code === 'ENOTFOUND' || // DNS errors
515
- error.code === 'ECONNRESET' || // Connection reset
516
- error.code === 'ETIMEDOUT' // Timeout
517
- );
518
- }
519
-
520
- /**
521
- * Calculate backoff delay with exponential increase
522
- */
523
- calculateBackoffDelay(attempt, error) {
524
- // For rate limiting, use reset time if available
525
- if (error.status === 429 && error.response?.headers['x-ratelimit-reset']) {
526
- const resetTime = parseInt(error.response.headers['x-ratelimit-reset']) * 1000;
527
- const now = Date.now();
528
- const delay = Math.max(resetTime - now, BASE_RETRY_DELAY);
529
- return Math.min(delay, 60000); // Cap at 1 minute
530
- }
531
-
532
- // Exponential backoff for other errors
533
- return BASE_RETRY_DELAY * Math.pow(2, attempt - 1);
534
- }
535
-
536
- /**
537
- * Update rate limit information from response headers
538
- */
539
- updateRateLimitInfo(headers) {
540
- this.rateLimitInfo = {
541
- limit: parseInt(headers['x-ratelimit-limit']) || 0,
542
- remaining: parseInt(headers['x-ratelimit-remaining']) || 0,
543
- reset: parseInt(headers['x-ratelimit-reset']) || 0,
544
- used: parseInt(headers['x-ratelimit-used']) || 0,
545
- };
546
- }
547
-
548
- /**
549
- * Respect rate limits with intelligent delays
550
- */
551
- async respectRateLimit() {
552
- if (!this.rateLimitInfo) {
553
- return;
554
- }
555
-
556
- const { remaining, reset } = this.rateLimitInfo;
557
-
558
- // If we're running low on requests, add delay
559
- if (remaining < RATE_LIMIT_BUFFER) {
560
- const resetTime = reset * 1000;
561
- const now = Date.now();
562
- const delay = Math.max(resetTime - now, 1000);
563
-
564
- this.log(`Rate limit low (${remaining} remaining), waiting ${delay}ms`, 'warn');
565
- await this.sleep(delay);
566
- }
567
- }
568
-
569
- /**
570
- * Load progress from file for resume capability
571
- */
572
- async loadProgress(resume) {
573
- if (!resume || !this.resumeFile) {
574
- return {};
575
- }
576
-
577
- try {
578
- const progressData = await fs.readFile(this.resumeFile, 'utf8');
579
- const progress = JSON.parse(progressData);
580
- this.log(`Resuming from page ${progress.lastPage || 1}`);
581
- return progress;
582
- } catch {
583
- this.log('No previous progress found, starting fresh');
584
- return {};
585
- }
586
- }
587
-
588
- /**
589
- * Save progress to file
590
- */
591
- async saveProgress(progress) {
592
- if (!this.resumeFile) {
593
- return;
594
- }
595
-
596
- try {
597
- await fs.mkdir(path.dirname(this.resumeFile), { recursive: true });
598
- await fs.writeFile(this.resumeFile, JSON.stringify(progress, null, 2));
599
- } catch (error) {
600
- this.log(`Failed to save progress: ${error.message}`, 'error');
601
- }
602
- }
603
-
604
- /**
605
- * Sleep for specified milliseconds
606
- */
607
- async sleep(ms) {
608
- return new Promise((resolve) => setTimeout(resolve, ms));
609
- }
610
-
611
- /**
612
- * Logging with color support
613
- */
614
- log(message, level = 'info') {
615
- if (!this.debug && level === 'debug') {
616
- return;
617
- }
618
-
619
- const colors = {
620
- info: chalk.blue,
621
- warn: chalk.yellow,
622
- error: chalk.red,
623
- debug: chalk.cyan,
624
- };
625
-
626
- const colorFn = colors[level] || chalk.white;
627
- console.log(colorFn(`[GitHub Client] ${message}`));
628
- }
629
- }