@phystack/prdl 4.4.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +959 -0
  2. package/package.json +23 -0
package/index.js ADDED
@@ -0,0 +1,959 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { Octokit } from '@octokit/rest';
4
+ import fs from 'fs';
5
+ import path from 'path';
6
+ import { execSync } from 'child_process';
7
+ import { fileURLToPath } from 'url';
8
+
9
+ // Get current directory in ESM
10
+ const __filename = fileURLToPath(import.meta.url);
11
+ const __dirname = path.dirname(__filename);
12
+
13
+ // Configuration
14
+ const OUTPUT_FILE = 'pr-comments.json';
15
+ const MAX_CONCURRENT_REQUESTS = 5; // Max number of concurrent API requests
16
+ const BACKUP_INTERVAL = 60000; // Save backup every minute (60 seconds)
17
+
18
+ // Track processed data globally for backup
19
+ let globalComments = [];
20
+ let isShuttingDown = false;
21
+
22
+ // Function to save the current comments to a backup file
23
+ function saveBackup(comments, outputPath, isFinal = false) {
24
+ try {
25
+ const backupPath = isFinal ? outputPath : `${outputPath}.backup`;
26
+ fs.writeFileSync(backupPath, JSON.stringify(comments, null, 2));
27
+ if (!isFinal) {
28
+ console.log(`Backup saved to ${backupPath} with ${comments.length} comments`);
29
+ }
30
+ } catch (error) {
31
+ console.error(`Error saving backup: ${error.message}`);
32
+ }
33
+ }
34
+
35
+ // Setup signal handlers for graceful shutdown
36
+ function setupSignalHandlers(outputPath) {
37
+ const signalHandler = (signal) => {
38
+ if (isShuttingDown) return;
39
+
40
+ isShuttingDown = true;
41
+ console.log(`\nReceived ${signal}. Saving partial results before exiting...`);
42
+
43
+ // Save backup with whatever we have so far
44
+ const backupPath = `${outputPath}.partial`;
45
+ saveBackup(globalComments, backupPath, true);
46
+ console.log(`Saved ${globalComments.length} comments to ${backupPath}`);
47
+ console.log('You can resume later by using these partial results.');
48
+
49
+ process.exit(0);
50
+ };
51
+
52
+ // Handle termination signals
53
+ process.on('SIGINT', () => signalHandler('SIGINT'));
54
+ process.on('SIGTERM', () => signalHandler('SIGTERM'));
55
+ process.on('SIGHUP', () => signalHandler('SIGHUP'));
56
+ }
57
+
58
+ // Display usage information
59
+ function showUsage() {
60
+ console.log('PR Comments Downloader');
61
+ console.log('---------------------');
62
+ console.log('A tool to download all PR comments from a GitHub repository');
63
+ console.log('\nUsage:');
64
+ console.log(' node pr-comments-downloader.js [repository_url] [pr_limit]');
65
+ console.log('\nExamples:');
66
+ console.log(' node pr-comments-downloader.js https://github.com/owner/repo 50');
67
+ console.log(' node pr-comments-downloader.js git@github.com:owner/repo.git');
68
+ console.log(' node pr-comments-downloader.js');
69
+ console.log('\nParameters:');
70
+ console.log(' repository_url: GitHub repository URL (optional if running in a git repository)');
71
+ console.log(' pr_limit: Maximum number of PRs to process, sorted by creation date (newest first)');
72
+ console.log(' If not specified, all PRs will be processed');
73
+ console.log('\nRequirements:');
74
+ console.log(' - GITHUB_TOKEN environment variable must be set with a valid GitHub personal access token');
75
+ process.exit(0);
76
+ }
77
+
78
+ // Simple queue implementation
79
+ class RequestQueue {
80
+ constructor(maxConcurrent = 5) {
81
+ this.queue = [];
82
+ this.active = 0;
83
+ this.maxConcurrent = maxConcurrent;
84
+ this.completed = 0;
85
+ this.total = 0;
86
+ this.failed = 0;
87
+ }
88
+
89
+ add(promiseFn, description = '', timeoutMs = 30000) {
90
+ return new Promise((resolve, reject) => {
91
+ this.queue.push({
92
+ fn: promiseFn,
93
+ resolve,
94
+ reject,
95
+ description,
96
+ timeoutMs
97
+ });
98
+ this.total++;
99
+ this.process();
100
+ });
101
+ }
102
+
103
+ async process() {
104
+ if (this.active >= this.maxConcurrent || this.queue.length === 0) {
105
+ return;
106
+ }
107
+
108
+ const item = this.queue.shift();
109
+ this.active++;
110
+
111
+ // Set a timeout to prevent requests from hanging indefinitely
112
+ const timeoutPromise = new Promise((_, reject) => {
113
+ setTimeout(() => {
114
+ reject(new Error(`Request timed out after ${item.timeoutMs}ms: ${item.description}`));
115
+ }, item.timeoutMs);
116
+ });
117
+
118
+ try {
119
+ // Race between the actual request and a timeout
120
+ const result = await Promise.race([item.fn(), timeoutPromise]);
121
+ this.completed++;
122
+ if (this.completed % 10 === 0 || this.completed === this.total) {
123
+ console.log(`Progress: ${this.completed}/${this.total} requests completed (${Math.round(this.completed/this.total*100)}%), ${this.failed} failed`);
124
+ }
125
+ item.resolve(result);
126
+ } catch (error) {
127
+ this.failed++;
128
+ console.warn(`Request failed: ${item.description} - ${error.message}`);
129
+ // Resolve with null rather than rejecting to avoid Promise.all failing
130
+ item.resolve(null);
131
+ } finally {
132
+ this.active--;
133
+ this.process();
134
+ }
135
+
136
+ // Process more items if possible
137
+ this.process();
138
+ }
139
+ }
140
+
141
+ // Check if help was requested
142
+ if (process.argv.includes('--help') || process.argv.includes('-h')) {
143
+ showUsage();
144
+ }
145
+
146
+ async function main() {
147
+ // Track errors for reporting
148
+ const errorStats = {
149
+ notFoundFiles: new Set(),
150
+ failedPRs: new Set(),
151
+ totalErrors: 0
152
+ };
153
+
154
+ // Set up output path
155
+ const outputPath = path.join(process.cwd(), OUTPUT_FILE);
156
+
157
+ // Set up signal handlers for graceful shutdown
158
+ setupSignalHandlers(outputPath);
159
+
160
+ try {
161
+ // Get GitHub token from environment variable
162
+ const token = process.env.GITHUB_TOKEN;
163
+ if (!token) {
164
+ console.error('Error: GITHUB_TOKEN environment variable not set');
165
+ console.error('Please set it with: export GITHUB_TOKEN=your_github_token');
166
+ process.exit(1);
167
+ }
168
+
169
+ // Initialize Octokit with rate limit handling
170
+ const octokit = new Octokit({
171
+ auth: token,
172
+ throttle: {
173
+ onRateLimit: (retryAfter, options) => {
174
+ console.warn(`Rate limit hit for request ${options.method} ${options.url}, retrying after ${retryAfter} seconds`);
175
+ return true; // Always retry
176
+ },
177
+ onSecondaryRateLimit: (retryAfter, options) => {
178
+ console.warn(`Secondary rate limit hit for request ${options.method} ${options.url}, retrying after ${retryAfter} seconds`);
179
+ return true; // Always retry
180
+ },
181
+ }
182
+ });
183
+
184
+ // Get repository info from command line argument or git config
185
+ console.log('Detecting repository information...');
186
+ let remoteUrl;
187
+ let prLimit = null;
188
+
189
+ // Parse command line arguments
190
+ if (process.argv.length > 2) {
191
+ // First argument is the repository URL if it looks like one
192
+ if (process.argv[2].includes('github.com') || process.argv[2].includes(':')) {
193
+ remoteUrl = process.argv[2];
194
+ console.log(`Using provided repository URL: ${remoteUrl}`);
195
+
196
+ // Check if there's a PR limit as well
197
+ if (process.argv.length > 3) {
198
+ const limitArg = parseInt(process.argv[3], 10);
199
+ if (!isNaN(limitArg) && limitArg > 0) {
200
+ prLimit = limitArg;
201
+ }
202
+ }
203
+ } else {
204
+ // First argument might be the PR limit
205
+ const limitArg = parseInt(process.argv[2], 10);
206
+ if (!isNaN(limitArg) && limitArg > 0) {
207
+ prLimit = limitArg;
208
+ }
209
+ }
210
+ }
211
+
212
+ // If PR limit is set, show it
213
+ if (prLimit !== null) {
214
+ console.log(`Processing the ${prLimit} most recent PRs`);
215
+ } else {
216
+ console.log('Processing all PRs');
217
+ }
218
+
219
+ // If no remote URL from args, fallback to git config
220
+ if (!remoteUrl) {
221
+ try {
222
+ remoteUrl = execSync('git config --get remote.origin.url').toString().trim();
223
+ console.log(`Using git config repository URL: ${remoteUrl}`);
224
+ } catch (error) {
225
+ console.error('Error: Could not determine repository from git config');
226
+ console.error('Please provide a GitHub repository URL as argument:');
227
+ console.error(' node pr-comments-downloader.js https://github.com/owner/repo');
228
+ process.exit(1);
229
+ }
230
+ }
231
+
232
+ // Parse owner and repo from the remote URL
233
+ let owner, repo;
234
+
235
+ if (remoteUrl.includes('github.com')) {
236
+ const match = remoteUrl.match(/github\.com[:/]([^/]+)\/([^/\.]+)(?:\.git)?$/);
237
+ if (match) {
238
+ owner = match[1];
239
+ repo = match[2];
240
+ }
241
+ }
242
+
243
+ if (!owner || !repo) {
244
+ console.error('Error: Could not determine repository owner and name');
245
+ console.error('Repository URL:', remoteUrl);
246
+ console.error('Please provide a valid GitHub repository URL in the format:');
247
+ console.error(' https://github.com/owner/repo');
248
+ console.error(' git@github.com:owner/repo.git');
249
+ process.exit(1);
250
+ }
251
+
252
+ console.log(`Repository: ${owner}/${repo}`);
253
+
254
+ // Get all pull requests
255
+ console.log('Fetching pull requests...');
256
+ const allPRs = await getPullRequests(octokit, owner, repo);
257
+ console.log(`Found ${allPRs.length} pull requests`);
258
+
259
+ // Sort PRs by creation date (newest first)
260
+ allPRs.sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
261
+
262
+ // Limit PRs if specified
263
+ const prsToProcess = prLimit ? allPRs.slice(0, prLimit) : allPRs;
264
+ console.log(`Processing ${prsToProcess.length} pull requests`);
265
+
266
+ // Setup a timer to save backups periodically
267
+ const backupIntervalId = setInterval(() => {
268
+ if (globalComments.length > 0) {
269
+ saveBackup(globalComments, outputPath);
270
+ }
271
+ }, BACKUP_INTERVAL);
272
+
273
+ // Get comments for each pull request using parallelization
274
+ console.log('Fetching comments for each pull request (in parallel)...');
275
+ const commentData = await getAllCommentsParallel(octokit, owner, repo, prsToProcess, errorStats);
276
+
277
+ // Store globally for backup in case of interruption
278
+ globalComments = commentData;
279
+
280
+ // Stop the backup interval
281
+ clearInterval(backupIntervalId);
282
+
283
+ // Save to file
284
+ saveBackup(commentData, outputPath, true);
285
+ console.log(`Successfully saved ${commentData.length} PR comments to ${outputPath}`);
286
+
287
+ // Print error statistics if any
288
+ if (errorStats.totalErrors > 0) {
289
+ console.log('\nSummary of errors encountered:');
290
+ console.log(`- Total errors: ${errorStats.totalErrors}`);
291
+ console.log(`- Files not found: ${errorStats.notFoundFiles.size}`);
292
+
293
+ if (errorStats.notFoundFiles.size > 0) {
294
+ console.log('\nList of files that could not be found:');
295
+ Array.from(errorStats.notFoundFiles).slice(0, 10).forEach(file => {
296
+ console.log(`- ${file}`);
297
+ });
298
+
299
+ if (errorStats.notFoundFiles.size > 10) {
300
+ console.log(`...and ${errorStats.notFoundFiles.size - 10} more`);
301
+ }
302
+ }
303
+
304
+ if (errorStats.failedPRs.size > 0) {
305
+ console.log('\nPRs with errors:');
306
+ Array.from(errorStats.failedPRs).slice(0, 10).forEach(pr => {
307
+ console.log(`- PR #${pr}`);
308
+ });
309
+
310
+ if (errorStats.failedPRs.size > 10) {
311
+ console.log(`...and ${errorStats.failedPRs.size - 10} more`);
312
+ }
313
+ }
314
+ }
315
+ } catch (error) {
316
+ console.error('Error:', error.message);
317
+ process.exit(1);
318
+ }
319
+ }
320
+
321
+ async function getPullRequests(octokit, owner, repo) {
322
+ const allPRs = [];
323
+ let page = 1;
324
+ let hasNextPage = true;
325
+
326
+ while (hasNextPage) {
327
+ const response = await octokit.pulls.list({
328
+ owner,
329
+ repo,
330
+ state: 'all',
331
+ per_page: 100,
332
+ page,
333
+ });
334
+
335
+ if (response.data.length === 0) {
336
+ hasNextPage = false;
337
+ } else {
338
+ allPRs.push(...response.data);
339
+ page++;
340
+ }
341
+ }
342
+
343
+ return allPRs;
344
+ }
345
+
346
+ async function getAllCommentsParallel(octokit, owner, repo, pullRequests, errorStats) {
347
+ const requestQueue = new RequestQueue(MAX_CONCURRENT_REQUESTS);
348
+ const allComments = [];
349
+ let processedCount = 0;
350
+ const totalPRs = pullRequests.length;
351
+
352
+ // Process PRs in smaller batches to avoid memory issues
353
+ const batchSize = 10; // Process 10 PRs at a time
354
+ let startIndex = 0;
355
+
356
+ while (startIndex < pullRequests.length) {
357
+ const endIndex = Math.min(startIndex + batchSize, pullRequests.length);
358
+ const currentBatch = pullRequests.slice(startIndex, endIndex);
359
+ console.log(`Processing PR batch ${startIndex+1}-${endIndex} of ${pullRequests.length}`);
360
+
361
+ // Create an array of promises for processing each PR in the current batch
362
+ const prPromises = currentBatch.map((pr, index) => {
363
+ return requestQueue.add(
364
+ async () => {
365
+ console.log(`Processing PR #${pr.number} (${startIndex + index + 1}/${totalPRs})`);
366
+ try {
367
+ const comments = await processOnePR(octokit, owner, repo, pr, errorStats, requestQueue);
368
+ processedCount++;
369
+ console.log(`Completed PR #${pr.number} (${processedCount}/${totalPRs})`);
370
+ return comments;
371
+ } catch (error) {
372
+ console.error(`Error processing PR #${pr.number}: ${error.message}`);
373
+ errorStats.totalErrors++;
374
+ errorStats.failedPRs.add(pr.number);
375
+ processedCount++;
376
+ console.log(`Failed PR #${pr.number} (${processedCount}/${totalPRs})`);
377
+ return [];
378
+ }
379
+ },
380
+ `PR #${pr.number}`,
381
+ 60000 // 60 second timeout for each PR
382
+ );
383
+ });
384
+
385
+ // Wait for all PRs in this batch to complete
386
+ console.log(`Waiting for ${prPromises.length} PRs in batch to complete processing...`);
387
+ const batchResults = await Promise.all(prPromises);
388
+
389
+ // Process results from this batch
390
+ let batchComments = [];
391
+ batchResults.filter(arr => arr !== null).forEach(comments => {
392
+ if (Array.isArray(comments)) {
393
+ batchComments.push(...comments);
394
+ }
395
+ });
396
+
397
+ // Add the batch comments to all comments
398
+ allComments.push(...batchComments);
399
+
400
+ // Update global comments for backup
401
+ globalComments = [...allComments];
402
+
403
+ console.log(`Batch ${startIndex+1}-${endIndex} completed with ${batchComments.length} comments. Total: ${allComments.length}`);
404
+
405
+ // Move to next batch
406
+ startIndex += batchSize;
407
+ }
408
+
409
+ return allComments;
410
+ }
411
+
412
+ async function processOnePR(octokit, owner, repo, pr, errorStats, requestQueue) {
413
+ const prComments = [];
414
+ const commentThreads = new Map(); // Map to organize comments into threads
415
+
416
+ try {
417
+ // Queue all requests for this PR in parallel with shorter timeouts
418
+ const requestResults = await Promise.allSettled([
419
+ getReviewComments(octokit, owner, repo, pr.number).catch(e => {
420
+ console.warn(`Failed to get review comments for PR #${pr.number}: ${e.message}`);
421
+ return [];
422
+ }),
423
+ getIssueComments(octokit, owner, repo, pr.number).catch(e => {
424
+ console.warn(`Failed to get issue comments for PR #${pr.number}: ${e.message}`);
425
+ return [];
426
+ }),
427
+ getReviews(octokit, owner, repo, pr.number).catch(e => {
428
+ console.warn(`Failed to get reviews for PR #${pr.number}: ${e.message}`);
429
+ return [];
430
+ })
431
+ ]);
432
+
433
+ // Extract results, defaulting to empty arrays for rejected promises
434
+ const [reviewCommentsResult, issueCommentsResult, reviewsResult] = requestResults;
435
+ const reviewComments = reviewCommentsResult.status === 'fulfilled' ? reviewCommentsResult.value : [];
436
+ const issueComments = issueCommentsResult.status === 'fulfilled' ? issueCommentsResult.value : [];
437
+ const reviews = reviewsResult.status === 'fulfilled' ? reviewsResult.value : [];
438
+
439
+ // Add statistics if any request failed
440
+ if (requestResults.some(r => r.status === 'rejected')) {
441
+ errorStats.failedPRs.add(pr.number);
442
+ errorStats.totalErrors++;
443
+ }
444
+
445
+ // Log some stats about the comments found
446
+ console.log(`PR #${pr.number}: Found ${reviewComments.length} review comments, ${issueComments.length} issue comments, ${reviews.length} reviews`);
447
+
448
+ // First, organize the review comments into threads
449
+ reviewComments.forEach(comment => {
450
+ // Store comment by ID for easy access later
451
+ if (comment.in_reply_to_id) {
452
+ // This is a reply to another comment
453
+ if (!commentThreads.has(comment.in_reply_to_id)) {
454
+ commentThreads.set(comment.in_reply_to_id, {
455
+ parent: null,
456
+ replies: []
457
+ });
458
+ }
459
+ commentThreads.get(comment.in_reply_to_id).replies.push(comment);
460
+ } else {
461
+ // This is a top-level comment
462
+ if (!commentThreads.has(comment.id)) {
463
+ commentThreads.set(comment.id, {
464
+ parent: comment,
465
+ replies: []
466
+ });
467
+ } else {
468
+ commentThreads.get(comment.id).parent = comment;
469
+ }
470
+ }
471
+ });
472
+
473
+ // Process review comments with code context in parallel
474
+ if (reviewComments.length > 0) {
475
+ // First, process the parent comments (top-level comments)
476
+ const parentCommentPromises = Array.from(commentThreads.values())
477
+ .filter(thread => thread.parent !== null)
478
+ .map(thread => {
479
+ const comment = thread.parent;
480
+ return requestQueue.add(
481
+ async () => {
482
+ try {
483
+ let codeContext = null;
484
+ if (comment.path && (comment.line || comment.position || comment.original_position)) {
485
+ codeContext = await getCodeContext(octokit, owner, repo, pr.number, comment, errorStats);
486
+ }
487
+
488
+ // Get reply discussion
489
+ const replies = thread.replies.map(reply => ({
490
+ id: reply.id,
491
+ author: reply.user?.login || 'unknown',
492
+ body: reply.body || '',
493
+ created_at: reply.created_at,
494
+ html_url: reply.html_url
495
+ }));
496
+
497
+ return {
498
+ type: 'review_comment',
499
+ pr_number: pr.number,
500
+ pr_title: pr.title,
501
+ pr_created_at: pr.created_at,
502
+ author: comment.user?.login || 'unknown',
503
+ body: comment.body || '',
504
+ created_at: comment.created_at,
505
+ path: comment.path,
506
+ commit_id: comment.commit_id,
507
+ line: comment.line,
508
+ original_line: comment.original_line,
509
+ position: comment.position,
510
+ original_position: comment.original_position,
511
+ diff_hunk: comment.diff_hunk,
512
+ html_url: comment.html_url,
513
+ code_context: codeContext,
514
+ discussion: {
515
+ replies: replies,
516
+ count: replies.length
517
+ }
518
+ };
519
+ } catch (error) {
520
+ console.warn(`Couldn't fetch code context for comment in PR #${pr.number}: ${error.message}`);
521
+ errorStats.totalErrors++;
522
+ errorStats.failedPRs.add(pr.number);
523
+ if (comment.path) {
524
+ errorStats.notFoundFiles.add(comment.path);
525
+ }
526
+
527
+ // Still return the comment, just without code context
528
+ const replies = thread.replies.map(reply => ({
529
+ id: reply.id,
530
+ author: reply.user?.login || 'unknown',
531
+ body: reply.body || '',
532
+ created_at: reply.created_at,
533
+ html_url: reply.html_url
534
+ }));
535
+
536
+ return {
537
+ type: 'review_comment',
538
+ pr_number: pr.number,
539
+ pr_title: pr.title,
540
+ pr_created_at: pr.created_at,
541
+ author: comment.user?.login || 'unknown',
542
+ body: comment.body || '',
543
+ created_at: comment.created_at,
544
+ path: comment.path,
545
+ commit_id: comment.commit_id,
546
+ line: comment.line,
547
+ original_line: comment.original_line,
548
+ position: comment.position,
549
+ original_position: comment.original_position,
550
+ diff_hunk: comment.diff_hunk,
551
+ html_url: comment.html_url,
552
+ code_context: null,
553
+ discussion: {
554
+ replies: replies,
555
+ count: replies.length
556
+ }
557
+ };
558
+ }
559
+ },
560
+ `CodeContext PR #${pr.number} comment ${comment.id}`,
561
+ 45000 // 45 second timeout for each code context
562
+ );
563
+ });
564
+
565
+ // Process all parent review comments in parallel with a timeout
566
+ const reviewCommentsResults = await Promise.allSettled(parentCommentPromises);
567
+
568
+ // Filter fulfilled promises and add to comments
569
+ reviewCommentsResults
570
+ .filter(result => result.status === 'fulfilled' && result.value !== null)
571
+ .forEach(result => prComments.push(result.value));
572
+ }
573
+
574
+ // Group issue comments into conversations by time proximity
575
+ // This is heuristic-based since GitHub doesn't explicitly mark issue comment threads
576
+ if (issueComments.length > 0) {
577
+ // Sort comments by time
578
+ const sortedIssueComments = [...issueComments].sort((a, b) =>
579
+ new Date(a.created_at) - new Date(b.created_at)
580
+ );
581
+
582
+ // Group comments into conversations (30-minute window for replies)
583
+ const issueConversations = [];
584
+ let currentConversation = [];
585
+
586
+ for (const comment of sortedIssueComments) {
587
+ if (currentConversation.length === 0) {
588
+ currentConversation.push(comment);
589
+ } else {
590
+ const lastComment = currentConversation[currentConversation.length - 1];
591
+ const timeDiff = new Date(comment.created_at) - new Date(lastComment.created_at);
592
+ const THIRTY_MINUTES_MS = 30 * 60 * 1000;
593
+
594
+ if (timeDiff < THIRTY_MINUTES_MS) {
595
+ // Consider it part of the same conversation
596
+ currentConversation.push(comment);
597
+ } else {
598
+ // Start a new conversation
599
+ issueConversations.push([...currentConversation]);
600
+ currentConversation = [comment];
601
+ }
602
+ }
603
+ }
604
+
605
+ // Add the last conversation if not empty
606
+ if (currentConversation.length > 0) {
607
+ issueConversations.push(currentConversation);
608
+ }
609
+
610
+ // Process each conversation
611
+ issueConversations.forEach(conversation => {
612
+ const firstComment = conversation[0];
613
+ const replies = conversation.slice(1);
614
+
615
+ prComments.push({
616
+ type: 'issue_comment',
617
+ pr_number: pr.number,
618
+ pr_title: pr.title,
619
+ pr_created_at: pr.created_at,
620
+ author: firstComment.user?.login || 'unknown',
621
+ body: firstComment.body || '',
622
+ created_at: firstComment.created_at,
623
+ html_url: firstComment.html_url,
624
+ discussion: {
625
+ replies: replies.map(reply => ({
626
+ id: reply.id,
627
+ author: reply.user?.login || 'unknown',
628
+ body: reply.body || '',
629
+ created_at: reply.created_at,
630
+ html_url: reply.html_url
631
+ })),
632
+ count: replies.length
633
+ }
634
+ });
635
+ });
636
+ }
637
+
638
+ // Process reviews with their comments
639
+ const reviewsWithComments = new Map();
640
+
641
+ reviews.forEach(review => {
642
+ if (!reviewsWithComments.has(review.id)) {
643
+ reviewsWithComments.set(review.id, {
644
+ review: review,
645
+ comments: []
646
+ });
647
+ }
648
+ });
649
+
650
+ // Associate review comments with their parent reviews
651
+ reviewComments.forEach(comment => {
652
+ if (comment.pull_request_review_id && reviewsWithComments.has(comment.pull_request_review_id)) {
653
+ reviewsWithComments.get(comment.pull_request_review_id).comments.push(comment);
654
+ }
655
+ });
656
+
657
+ // Process each review with its associated comments
658
+ for (const [reviewId, reviewData] of reviewsWithComments.entries()) {
659
+ const review = reviewData.review;
660
+ const comments = reviewData.comments;
661
+
662
+ if (review.body?.trim() || comments.length > 0) {
663
+ prComments.push({
664
+ type: 'review',
665
+ pr_number: pr.number,
666
+ pr_title: pr.title,
667
+ pr_created_at: pr.created_at,
668
+ review_id: review.id,
669
+ author: review.user?.login || 'unknown',
670
+ body: review.body || '',
671
+ state: review.state,
672
+ created_at: review.submitted_at,
673
+ html_url: review.html_url,
674
+ comments: comments.map(comment => ({
675
+ id: comment.id,
676
+ path: comment.path,
677
+ position: comment.position,
678
+ body: comment.body,
679
+ diff_hunk: comment.diff_hunk
680
+ })),
681
+ comments_count: comments.length
682
+ });
683
+ }
684
+ }
685
+ } catch (error) {
686
+ console.error(`Error processing PR #${pr.number}: ${error.message}`);
687
+ errorStats.totalErrors++;
688
+ errorStats.failedPRs.add(pr.number);
689
+ }
690
+
691
+ return prComments;
692
+ }
693
+
694
+ async function getCodeContext(octokit, owner, repo, pullNumber, comment, errorStats) {
695
+ // Get both the original code (when commented) and final code (after merged)
696
+ let originalFileContent;
697
+ let mergedFileContent;
698
+ const result = {
699
+ commit_id: comment.commit_id,
700
+ original: null,
701
+ merged: null
702
+ };
703
+
704
+ // 1. Get original file content at the comment's commit
705
+ try {
706
+ // First check if this file exists at the commit
707
+ try {
708
+ const response = await octokit.repos.getContent({
709
+ owner,
710
+ repo,
711
+ path: comment.path,
712
+ ref: comment.commit_id // This is the commit the comment was made on
713
+ });
714
+
715
+ // Decode content from base64
716
+ if (response.data.encoding === 'base64') {
717
+ originalFileContent = Buffer.from(response.data.content, 'base64').toString();
718
+
719
+ // Process original file content
720
+ const lines = originalFileContent.split('\n');
721
+ const lineNum = comment.line || comment.position || 1;
722
+ const startLine = Math.max(1, lineNum - 5);
723
+ const endLine = Math.min(lines.length, lineNum + 5);
724
+
725
+ // Extract the lines for context
726
+ const contextLines = {};
727
+ for (let i = startLine; i <= endLine; i++) {
728
+ contextLines[i] = lines[i - 1]; // -1 because array is 0-indexed
729
+ }
730
+
731
+ result.original = {
732
+ startLine,
733
+ endLine,
734
+ lines: contextLines,
735
+ highlightedLine: lineNum
736
+ };
737
+ }
738
+ } catch (getFileError) {
739
+ // File might not exist at that commit, or the file was renamed/moved
740
+ // Record the error for reporting
741
+ if (getFileError.status === 404) {
742
+ if (comment.path) {
743
+ errorStats.notFoundFiles.add(comment.path);
744
+ }
745
+ errorStats.totalErrors++;
746
+ }
747
+
748
+ // Try to get the diff context from the comment itself
749
+ if (comment.diff_hunk) {
750
+ result.original = {
751
+ source: "diff_hunk",
752
+ diff_hunk: comment.diff_hunk
753
+ };
754
+ } else {
755
+ throw new Error(`File not found at commit ${comment.commit_id}`);
756
+ }
757
+ }
758
+ } catch (error) {
759
+ // Don't log 404 errors as they're expected in many cases
760
+ if (!error.message.includes('404')) {
761
+ console.warn(`Couldn't get original file content for ${comment.path} at commit ${comment.commit_id}: ${error.message}`);
762
+ }
763
+
764
+ errorStats.totalErrors++;
765
+
766
+ result.original = {
767
+ error: "Couldn't retrieve original file state",
768
+ reason: error.message,
769
+ // Include diff hunk if available
770
+ diff_hunk: comment.diff_hunk || null
771
+ };
772
+ }
773
+
774
+ // 2. Try to get the merged state (final version after PR was merged)
775
+ try {
776
+ // First get PR info to know the merge commit
777
+ const prInfo = await octokit.pulls.get({
778
+ owner,
779
+ repo,
780
+ pull_number: pullNumber
781
+ });
782
+
783
+ // Only proceed if PR was merged
784
+ if (prInfo.data.merged) {
785
+ const mergeCommit = prInfo.data.merge_commit_sha;
786
+
787
+ try {
788
+ const response = await octokit.repos.getContent({
789
+ owner,
790
+ repo,
791
+ path: comment.path,
792
+ ref: mergeCommit // The merge commit
793
+ });
794
+
795
+ // Decode content from base64
796
+ if (response.data.encoding === 'base64') {
797
+ mergedFileContent = Buffer.from(response.data.content, 'base64').toString();
798
+
799
+ // Process merged file content
800
+ const lines = mergedFileContent.split('\n');
801
+ // Try to find the same area of code - this is approximate as line numbers may shift
802
+ // We'll use the same line number as a starting point
803
+ const lineNum = comment.line || comment.position || 1;
804
+ const startLine = Math.max(1, lineNum - 5);
805
+ const endLine = Math.min(lines.length, lineNum + 5);
806
+
807
+ // Extract the lines for context
808
+ const contextLines = {};
809
+ for (let i = startLine; i <= endLine; i++) {
810
+ if (i <= lines.length) {
811
+ contextLines[i] = lines[i - 1]; // -1 because array is 0-indexed
812
+ }
813
+ }
814
+
815
+ result.merged = {
816
+ merge_commit_sha: mergeCommit,
817
+ startLine,
818
+ endLine,
819
+ lines: contextLines,
820
+ highlightedLine: lineNum
821
+ };
822
+ }
823
+ } catch (contentError) {
824
+ // Don't log 404 errors as they're expected in many cases
825
+ if (!contentError.message.includes('404')) {
826
+ console.warn(`Couldn't get merged file content for ${comment.path} at commit ${mergeCommit}: ${contentError.message}`);
827
+ }
828
+
829
+ if (contentError.status === 404) {
830
+ if (comment.path) {
831
+ errorStats.notFoundFiles.add(comment.path);
832
+ }
833
+ errorStats.totalErrors++;
834
+ }
835
+
836
+ // The file might have been deleted or moved in the final version
837
+ result.merged = {
838
+ error: "Couldn't retrieve merged file state",
839
+ reason: contentError.message,
840
+ merge_commit_sha: mergeCommit
841
+ };
842
+ }
843
+ } else if (prInfo.data.state === 'closed' && !prInfo.data.merged) {
844
+ // PR was closed without merging
845
+ result.merged = {
846
+ state: "PR was closed without merging"
847
+ };
848
+ } else {
849
+ // PR is still open
850
+ result.merged = {
851
+ state: "PR is still open"
852
+ };
853
+ }
854
+ } catch (prError) {
855
+ // Don't log 404 errors as they're expected in many cases
856
+ if (!prError.message.includes('404')) {
857
+ console.warn(`Couldn't retrieve PR merge information for PR #${pullNumber}: ${prError.message}`);
858
+ }
859
+
860
+ errorStats.totalErrors++;
861
+
862
+ result.merged = {
863
+ error: "Couldn't retrieve PR merge information",
864
+ reason: prError.message
865
+ };
866
+ }
867
+
868
+ return result;
869
+ }
870
+
871
+ async function getReviewComments(octokit, owner, repo, pullNumber) {
872
+ const comments = [];
873
+ let page = 1;
874
+ let hasNextPage = true;
875
+
876
+ while (hasNextPage) {
877
+ const response = await octokit.pulls.listReviewComments({
878
+ owner,
879
+ repo,
880
+ pull_number: pullNumber,
881
+ per_page: 100,
882
+ page,
883
+ });
884
+
885
+ if (response.data.length === 0) {
886
+ hasNextPage = false;
887
+ } else {
888
+ // Enrich each comment with additional information
889
+ for (const comment of response.data) {
890
+ // Store the original position and other diff-related data
891
+ // This helps in accurately locating the code when the comment was made
892
+ comments.push({
893
+ ...comment,
894
+ // Ensure these fields are preserved (they sometimes get transformed)
895
+ original_position: comment.original_position,
896
+ original_line: comment.original_line,
897
+ position: comment.position,
898
+ line: comment.line,
899
+ diff_hunk: comment.diff_hunk
900
+ });
901
+ }
902
+ page++;
903
+ }
904
+ }
905
+
906
+ return comments;
907
+ }
908
+
909
+ async function getIssueComments(octokit, owner, repo, issueNumber) {
910
+ const comments = [];
911
+ let page = 1;
912
+ let hasNextPage = true;
913
+
914
+ while (hasNextPage) {
915
+ const response = await octokit.issues.listComments({
916
+ owner,
917
+ repo,
918
+ issue_number: issueNumber,
919
+ per_page: 100,
920
+ page,
921
+ });
922
+
923
+ if (response.data.length === 0) {
924
+ hasNextPage = false;
925
+ } else {
926
+ comments.push(...response.data);
927
+ page++;
928
+ }
929
+ }
930
+
931
+ return comments;
932
+ }
933
+
934
+ async function getReviews(octokit, owner, repo, pullNumber) {
935
+ const reviews = [];
936
+ let page = 1;
937
+ let hasNextPage = true;
938
+
939
+ while (hasNextPage) {
940
+ const response = await octokit.pulls.listReviews({
941
+ owner,
942
+ repo,
943
+ pull_number: pullNumber,
944
+ per_page: 100,
945
+ page,
946
+ });
947
+
948
+ if (response.data.length === 0) {
949
+ hasNextPage = false;
950
+ } else {
951
+ reviews.push(...response.data);
952
+ page++;
953
+ }
954
+ }
955
+
956
+ return reviews;
957
+ }
958
+
959
+ main();
package/package.json ADDED
@@ -0,0 +1,23 @@
1
+ {
2
+ "name": "@phystack/prdl",
3
+ "version": "4.4.29",
4
+ "main": "index.js",
5
+ "type": "module",
6
+ "bin": {
7
+ "prdl": "./index.js"
8
+ },
9
+ "scripts": {
10
+ "test": "echo \"Error: no test specified\" && exit 1",
11
+ "build": "true"
12
+ },
13
+ "publishConfig": {
14
+ "access": "public"
15
+ },
16
+ "author": "",
17
+ "license": "ISC",
18
+ "description": "",
19
+ "dependencies": {
20
+ "@octokit/rest": "^21.1.1"
21
+ },
22
+ "gitHead": "7dba834ca1f446f669992ff14352862d2fbcfe22"
23
+ }