PR2MD 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pr2md/__init__.py +21 -0
- pr2md/__main__.py +6 -0
- pr2md/cli.py +236 -0
- pr2md/formatter.py +231 -0
- pr2md/models.py +225 -0
- pr2md/pr_extractor.py +190 -0
- pr2md/py.typed +0 -0
- pr2md-1.0.1.dist-info/METADATA +317 -0
- pr2md-1.0.1.dist-info/RECORD +13 -0
- pr2md-1.0.1.dist-info/WHEEL +5 -0
- pr2md-1.0.1.dist-info/entry_points.txt +2 -0
- pr2md-1.0.1.dist-info/licenses/LICENSE.md +70 -0
- pr2md-1.0.1.dist-info/top_level.txt +1 -0
pr2md/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""PR2MD - Pull Request to Markdown Exporter.
|
|
2
|
+
|
|
3
|
+
A tool for extracting GitHub Pull Request data and formatting it as Markdown.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from pr2md.formatter import MarkdownFormatter
|
|
7
|
+
from pr2md.models import Comment, Label, PullRequest, Review, ReviewComment, User
|
|
8
|
+
from pr2md.pr_extractor import GitHubAPIError, GitHubPRExtractor
|
|
9
|
+
|
|
10
|
+
__version__ = "1.0.1"
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Comment",
|
|
13
|
+
"Label",
|
|
14
|
+
"PullRequest",
|
|
15
|
+
"Review",
|
|
16
|
+
"ReviewComment",
|
|
17
|
+
"User",
|
|
18
|
+
"GitHubAPIError",
|
|
19
|
+
"GitHubPRExtractor",
|
|
20
|
+
"MarkdownFormatter",
|
|
21
|
+
]
|
pr2md/__main__.py
ADDED
pr2md/cli.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""Command-line interface for GitHub PR extractor."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from pr2md.formatter import MarkdownFormatter
|
|
11
|
+
from pr2md.pr_extractor import GitHubAPIError, GitHubPRExtractor
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def setup_logging(verbose: bool = False) -> None:
|
|
15
|
+
"""
|
|
16
|
+
Set up logging configuration.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
verbose: Enable verbose logging
|
|
20
|
+
"""
|
|
21
|
+
level = logging.DEBUG if verbose else logging.INFO
|
|
22
|
+
logging.basicConfig(
|
|
23
|
+
level=level,
|
|
24
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
25
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def parse_pr_url(url: str) -> tuple[str, str, int]:
|
|
30
|
+
"""
|
|
31
|
+
Parse GitHub PR URL to extract owner, repo, and PR number.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
url: GitHub PR URL
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Tuple of (owner, repo, pr_number)
|
|
38
|
+
|
|
39
|
+
Raises:
|
|
40
|
+
ValueError: If URL is invalid
|
|
41
|
+
"""
|
|
42
|
+
pattern = r"https?://github\.com/([^/]+)/([^/]+)/pull/(\d+)"
|
|
43
|
+
match = re.match(pattern, url)
|
|
44
|
+
if not match:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Invalid GitHub PR URL: {url}\n"
|
|
47
|
+
"Expected format: https://github.com/owner/repo/pull/123"
|
|
48
|
+
)
|
|
49
|
+
owner, repo, pr_number_str = match.groups()
|
|
50
|
+
return str(owner), str(repo), int(pr_number_str)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def create_parser() -> argparse.ArgumentParser:
|
|
54
|
+
"""
|
|
55
|
+
Create command-line argument parser.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Configured ArgumentParser
|
|
59
|
+
"""
|
|
60
|
+
parser = argparse.ArgumentParser(
|
|
61
|
+
description="Extract GitHub Pull Request details to Markdown format",
|
|
62
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
63
|
+
epilog="""
|
|
64
|
+
Examples:
|
|
65
|
+
%(prog)s https://github.com/owner/repo/pull/123
|
|
66
|
+
%(prog)s owner repo 123
|
|
67
|
+
%(prog)s https://github.com/owner/repo/pull/123 -o output.md
|
|
68
|
+
%(prog)s owner repo 123 --output pr-details.md --verbose
|
|
69
|
+
""",
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"pr_identifier",
|
|
74
|
+
nargs="+",
|
|
75
|
+
help=(
|
|
76
|
+
"GitHub PR URL (https://github.com/owner/repo/pull/123) "
|
|
77
|
+
"or owner repo pr_number"
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
parser.add_argument(
|
|
82
|
+
"-o",
|
|
83
|
+
"--output",
|
|
84
|
+
type=str,
|
|
85
|
+
help="Output file path (default: stdout)",
|
|
86
|
+
default=None,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
"-v",
|
|
91
|
+
"--verbose",
|
|
92
|
+
action="store_true",
|
|
93
|
+
help="Enable verbose logging",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return parser
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def parse_arguments(
|
|
100
|
+
parser: argparse.ArgumentParser,
|
|
101
|
+
) -> tuple[str, str, int, Optional[str], bool]:
|
|
102
|
+
"""
|
|
103
|
+
Parse command-line arguments and extract PR details.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
parser: Argument parser
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Tuple of (owner, repo, pr_number, output_path, verbose)
|
|
110
|
+
"""
|
|
111
|
+
args = parser.parse_args()
|
|
112
|
+
logger = logging.getLogger(__name__)
|
|
113
|
+
|
|
114
|
+
# Initialize variables to satisfy pylint - they will be assigned in all code paths
|
|
115
|
+
owner: str = ""
|
|
116
|
+
repo: str = ""
|
|
117
|
+
pr_number: int = 0
|
|
118
|
+
|
|
119
|
+
# Parse PR identifier
|
|
120
|
+
try:
|
|
121
|
+
pr_args: list[str] = list(args.pr_identifier)
|
|
122
|
+
if len(pr_args) == 1:
|
|
123
|
+
# URL format
|
|
124
|
+
owner, repo, pr_number = parse_pr_url(str(pr_args[0]))
|
|
125
|
+
elif len(pr_args) == 3:
|
|
126
|
+
# owner repo pr_number format
|
|
127
|
+
owner = str(pr_args[0])
|
|
128
|
+
repo = str(pr_args[1])
|
|
129
|
+
pr_number = int(pr_args[2])
|
|
130
|
+
else:
|
|
131
|
+
parser.error(
|
|
132
|
+
"Invalid arguments. Provide either a PR URL or owner repo pr_number"
|
|
133
|
+
)
|
|
134
|
+
except (ValueError, IndexError) as err:
|
|
135
|
+
logger.error("Error parsing PR identifier: %s", err)
|
|
136
|
+
sys.exit(1)
|
|
137
|
+
|
|
138
|
+
output_path: Optional[str] = str(args.output) if args.output else None
|
|
139
|
+
verbose: bool = bool(args.verbose)
|
|
140
|
+
|
|
141
|
+
return owner, repo, pr_number, output_path, verbose
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def extract_pr_data(
|
|
145
|
+
owner: str, repo: str, pr_number: int, verbose: bool
|
|
146
|
+
) -> tuple[str, bool]:
|
|
147
|
+
"""
|
|
148
|
+
Extract PR data and format as Markdown.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
owner: Repository owner
|
|
152
|
+
repo: Repository name
|
|
153
|
+
pr_number: PR number
|
|
154
|
+
verbose: Enable verbose logging
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Tuple of (markdown, success)
|
|
158
|
+
"""
|
|
159
|
+
logger = logging.getLogger(__name__)
|
|
160
|
+
|
|
161
|
+
# Extract PR data
|
|
162
|
+
try:
|
|
163
|
+
extractor = GitHubPRExtractor(owner, repo, pr_number)
|
|
164
|
+
pull_request, comments, reviews, review_comments, diff = extractor.extract_all()
|
|
165
|
+
except GitHubAPIError as err:
|
|
166
|
+
logger.error("GitHub API error: %s", err)
|
|
167
|
+
return "", False
|
|
168
|
+
except Exception as err: # pylint: disable=broad-exception-caught
|
|
169
|
+
logger.error("Unexpected error: %s", err)
|
|
170
|
+
if verbose:
|
|
171
|
+
logger.exception("Full traceback:")
|
|
172
|
+
return "", False
|
|
173
|
+
|
|
174
|
+
# Format as Markdown
|
|
175
|
+
try:
|
|
176
|
+
markdown = MarkdownFormatter.format_pr(
|
|
177
|
+
pull_request, comments, reviews, review_comments, diff
|
|
178
|
+
)
|
|
179
|
+
return markdown, True
|
|
180
|
+
except Exception as err: # pylint: disable=broad-exception-caught
|
|
181
|
+
logger.error("Error formatting data: %s", err)
|
|
182
|
+
if verbose:
|
|
183
|
+
logger.exception("Full traceback:")
|
|
184
|
+
return "", False
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def write_output(markdown: str, output_path: Optional[str], verbose: bool) -> bool:
|
|
188
|
+
"""
|
|
189
|
+
Write markdown output to file or stdout.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
markdown: Formatted markdown string
|
|
193
|
+
output_path: Optional output file path
|
|
194
|
+
verbose: Enable verbose logging
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
True if successful, False otherwise
|
|
198
|
+
"""
|
|
199
|
+
logger = logging.getLogger(__name__)
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
if output_path:
|
|
203
|
+
Path(output_path).write_text(markdown, encoding="utf-8")
|
|
204
|
+
logger.info("Output written to %s", output_path)
|
|
205
|
+
else:
|
|
206
|
+
print(markdown) # noqa: T201
|
|
207
|
+
return True
|
|
208
|
+
except Exception as err: # pylint: disable=broad-exception-caught
|
|
209
|
+
logger.error("Error writing output: %s", err)
|
|
210
|
+
if verbose:
|
|
211
|
+
logger.exception("Full traceback:")
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def main() -> None:
|
|
216
|
+
"""Main entry point for the CLI."""
|
|
217
|
+
parser = create_parser()
|
|
218
|
+
owner, repo, pr_number, output_path, verbose = parse_arguments(parser)
|
|
219
|
+
|
|
220
|
+
setup_logging(verbose)
|
|
221
|
+
logger = logging.getLogger(__name__)
|
|
222
|
+
|
|
223
|
+
logger.info("Extracting PR %s/%s #%d", owner, repo, pr_number)
|
|
224
|
+
|
|
225
|
+
markdown, success = extract_pr_data(owner, repo, pr_number, verbose)
|
|
226
|
+
if not success:
|
|
227
|
+
sys.exit(1)
|
|
228
|
+
|
|
229
|
+
if not write_output(markdown, output_path, verbose):
|
|
230
|
+
sys.exit(1)
|
|
231
|
+
|
|
232
|
+
logger.info("Extraction completed successfully")
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
if __name__ == "__main__":
|
|
236
|
+
main()
|
pr2md/formatter.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""Markdown formatter for GitHub PR data."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
from pr2md.models import Comment, PullRequest, Review, ReviewComment
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MarkdownFormatter:
|
|
12
|
+
"""Format GitHub PR data as Markdown."""
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def format_pr(
|
|
16
|
+
pull_request: PullRequest,
|
|
17
|
+
comments: list[Comment],
|
|
18
|
+
reviews: list[Review],
|
|
19
|
+
review_comments: list[ReviewComment],
|
|
20
|
+
diff: str,
|
|
21
|
+
) -> str:
|
|
22
|
+
"""
|
|
23
|
+
Format all PR data as Markdown.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
pull_request: Pull request object
|
|
27
|
+
comments: List of comments
|
|
28
|
+
reviews: List of reviews
|
|
29
|
+
review_comments: List of review comments
|
|
30
|
+
diff: Diff string
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Formatted Markdown string
|
|
34
|
+
"""
|
|
35
|
+
logger.info("Formatting PR data as Markdown")
|
|
36
|
+
sections = [
|
|
37
|
+
MarkdownFormatter._format_header(pull_request),
|
|
38
|
+
MarkdownFormatter._format_description(pull_request),
|
|
39
|
+
MarkdownFormatter._format_changes_summary(pull_request),
|
|
40
|
+
MarkdownFormatter._format_diff(diff),
|
|
41
|
+
MarkdownFormatter._format_conversation(comments),
|
|
42
|
+
MarkdownFormatter._format_reviews(reviews),
|
|
43
|
+
MarkdownFormatter._format_review_comments(review_comments),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
result = "\n\n".join(sections)
|
|
47
|
+
logger.info("Formatted Markdown (%d characters)", len(result))
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def _format_header(pull_request: PullRequest) -> str:
|
|
52
|
+
"""Format PR header section."""
|
|
53
|
+
status = pull_request.state.upper()
|
|
54
|
+
if pull_request.merged_at:
|
|
55
|
+
status = "MERGED"
|
|
56
|
+
|
|
57
|
+
labels_str = ""
|
|
58
|
+
if pull_request.labels:
|
|
59
|
+
label_names = ", ".join(
|
|
60
|
+
[f"`{label.name}`" for label in pull_request.labels]
|
|
61
|
+
)
|
|
62
|
+
labels_str = f"\n**Labels:** {label_names}"
|
|
63
|
+
|
|
64
|
+
closed_str = ""
|
|
65
|
+
if pull_request.closed_at:
|
|
66
|
+
closed_time = pull_request.closed_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
67
|
+
closed_str = f"\n**Closed:** {closed_time}"
|
|
68
|
+
|
|
69
|
+
merged_str = ""
|
|
70
|
+
if pull_request.merged_at:
|
|
71
|
+
merged_time = pull_request.merged_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
72
|
+
merged_str = f"\n**Merged:** {merged_time}"
|
|
73
|
+
|
|
74
|
+
created_time = pull_request.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
75
|
+
updated_time = pull_request.updated_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
76
|
+
|
|
77
|
+
return f"""# {pull_request.title}
|
|
78
|
+
|
|
79
|
+
**PR Number:** #{pull_request.number}
|
|
80
|
+
**Status:** {status}
|
|
81
|
+
**Author:** [{pull_request.user.login}]({pull_request.user.html_url})
|
|
82
|
+
**Created:** {created_time}
|
|
83
|
+
**Updated:** {updated_time}{closed_str}{merged_str}
|
|
84
|
+
**URL:** {pull_request.html_url}
|
|
85
|
+
**Base:** `{pull_request.base_ref}` (`{pull_request.base_sha[:7]}`)
|
|
86
|
+
**Head:** `{pull_request.head_ref}` (`{pull_request.head_sha[:7]}`){labels_str}"""
|
|
87
|
+
|
|
88
|
+
@staticmethod
|
|
89
|
+
def _format_description(pull_request: PullRequest) -> str:
|
|
90
|
+
"""Format PR description section."""
|
|
91
|
+
if not pull_request.body:
|
|
92
|
+
return "## Description\n\n*No description provided.*"
|
|
93
|
+
return f"## Description\n\n{pull_request.body}"
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def _format_changes_summary(pull_request: PullRequest) -> str:
|
|
97
|
+
"""Format changes summary section."""
|
|
98
|
+
return f"""## Changes Summary
|
|
99
|
+
|
|
100
|
+
- **Files changed:** {pull_request.changed_files}
|
|
101
|
+
- **Additions:** +{pull_request.additions}
|
|
102
|
+
- **Deletions:** -{pull_request.deletions}"""
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def _format_diff(diff: str) -> str:
|
|
106
|
+
"""Format diff section."""
|
|
107
|
+
if not diff:
|
|
108
|
+
return "## Code Diff\n\n*No diff available.*"
|
|
109
|
+
|
|
110
|
+
return f"""## Code Diff
|
|
111
|
+
|
|
112
|
+
```diff
|
|
113
|
+
{diff}
|
|
114
|
+
```"""
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def _format_conversation(comments: list[Comment]) -> str:
|
|
118
|
+
"""Format conversation thread section."""
|
|
119
|
+
if not comments:
|
|
120
|
+
return "## Conversation Thread\n\n*No comments in the conversation thread.*"
|
|
121
|
+
|
|
122
|
+
# Sort by creation time
|
|
123
|
+
sorted_comments = sorted(comments, key=lambda c: c.created_at)
|
|
124
|
+
|
|
125
|
+
formatted_comments = []
|
|
126
|
+
for comment in sorted_comments:
|
|
127
|
+
comment_time = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
128
|
+
# pylint: disable=line-too-long
|
|
129
|
+
formatted_comment = f"""### [{comment.user.login}]({comment.user.html_url}) commented on {comment_time}
|
|
130
|
+
|
|
131
|
+
{comment.body}
|
|
132
|
+
|
|
133
|
+
*[View on GitHub]({comment.html_url})*"""
|
|
134
|
+
# pylint: enable=line-too-long
|
|
135
|
+
formatted_comments.append(formatted_comment)
|
|
136
|
+
|
|
137
|
+
return "## Conversation Thread\n\n" + "\n\n---\n\n".join(formatted_comments)
|
|
138
|
+
|
|
139
|
+
@staticmethod
|
|
140
|
+
def _format_reviews(reviews: list[Review]) -> str:
|
|
141
|
+
"""Format reviews section."""
|
|
142
|
+
if not reviews:
|
|
143
|
+
return "## Reviews\n\n*No reviews submitted.*"
|
|
144
|
+
|
|
145
|
+
# Sort by submission time
|
|
146
|
+
sorted_reviews = sorted(
|
|
147
|
+
reviews,
|
|
148
|
+
key=lambda r: r.submitted_at if r.submitted_at else r.user.login,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
formatted_reviews = []
|
|
152
|
+
for review in sorted_reviews:
|
|
153
|
+
submitted_str = (
|
|
154
|
+
review.submitted_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
155
|
+
if review.submitted_at
|
|
156
|
+
else "Unknown date"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
state_emoji: dict[str, str] = {
|
|
160
|
+
"APPROVED": "✅",
|
|
161
|
+
"CHANGES_REQUESTED": "🔴",
|
|
162
|
+
"COMMENTED": "💬",
|
|
163
|
+
"DISMISSED": "🚫",
|
|
164
|
+
"PENDING": "⏳",
|
|
165
|
+
}
|
|
166
|
+
emoji = state_emoji.get(review.state, "")
|
|
167
|
+
|
|
168
|
+
body_str = review.body if review.body else "*No comment provided.*"
|
|
169
|
+
|
|
170
|
+
# pylint: disable=line-too-long
|
|
171
|
+
formatted_review = f"""### {emoji} [{review.user.login}]({review.user.html_url}) {review.state.replace("_", " ")} on {submitted_str}
|
|
172
|
+
|
|
173
|
+
{body_str}
|
|
174
|
+
|
|
175
|
+
*[View on GitHub]({review.html_url})*"""
|
|
176
|
+
# pylint: enable=line-too-long
|
|
177
|
+
formatted_reviews.append(formatted_review)
|
|
178
|
+
|
|
179
|
+
return "## Reviews\n\n" + "\n\n---\n\n".join(formatted_reviews)
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def _format_review_comments(review_comments: list[ReviewComment]) -> str:
|
|
183
|
+
"""Format review comments section."""
|
|
184
|
+
if not review_comments:
|
|
185
|
+
return "## Review Comments (Code Comments)\n\n*No review comments on code.*"
|
|
186
|
+
|
|
187
|
+
# Group by file path
|
|
188
|
+
comments_by_file: dict[str, list[ReviewComment]] = defaultdict(list)
|
|
189
|
+
for comment in review_comments:
|
|
190
|
+
comments_by_file[comment.path].append(comment)
|
|
191
|
+
|
|
192
|
+
# Sort files alphabetically
|
|
193
|
+
sorted_files = sorted(comments_by_file.keys())
|
|
194
|
+
|
|
195
|
+
formatted_files = []
|
|
196
|
+
for file_path in sorted_files:
|
|
197
|
+
file_comments = sorted(
|
|
198
|
+
comments_by_file[file_path], key=lambda c: c.created_at
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
formatted_comments = []
|
|
202
|
+
for comment in file_comments:
|
|
203
|
+
# Check if this is a reply
|
|
204
|
+
reply_str = ""
|
|
205
|
+
if comment.in_reply_to_id:
|
|
206
|
+
reply_str = f" *(in reply to comment #{comment.in_reply_to_id})*"
|
|
207
|
+
|
|
208
|
+
comment_time = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
209
|
+
# pylint: disable=line-too-long
|
|
210
|
+
formatted_comment = f"""#### [{comment.user.login}]({comment.user.html_url}) commented on {comment_time}{reply_str}
|
|
211
|
+
|
|
212
|
+
**Code context:**
|
|
213
|
+
```diff
|
|
214
|
+
{comment.diff_hunk}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
**Comment:**
|
|
218
|
+
{comment.body}
|
|
219
|
+
|
|
220
|
+
*[View on GitHub]({comment.html_url})*"""
|
|
221
|
+
# pylint: enable=line-too-long
|
|
222
|
+
formatted_comments.append(formatted_comment)
|
|
223
|
+
|
|
224
|
+
file_section = f"""### File: `{file_path}`
|
|
225
|
+
|
|
226
|
+
{chr(10).join(formatted_comments)}"""
|
|
227
|
+
formatted_files.append(file_section)
|
|
228
|
+
|
|
229
|
+
return "## Review Comments (Code Comments)\n\n" + "\n\n---\n\n".join(
|
|
230
|
+
formatted_files
|
|
231
|
+
)
|
pr2md/models.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Data models for GitHub PR extraction."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class User:
|
|
10
|
+
"""GitHub user information."""
|
|
11
|
+
|
|
12
|
+
login: str
|
|
13
|
+
id: int
|
|
14
|
+
avatar_url: str
|
|
15
|
+
html_url: str
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def from_dict(cls, data: dict[str, Any]) -> "User":
|
|
19
|
+
"""Create User from API response dictionary."""
|
|
20
|
+
return cls(
|
|
21
|
+
login=str(data["login"]),
|
|
22
|
+
id=int(data["id"]),
|
|
23
|
+
avatar_url=str(data["avatar_url"]),
|
|
24
|
+
html_url=str(data["html_url"]),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class Label:
|
|
30
|
+
"""GitHub label information."""
|
|
31
|
+
|
|
32
|
+
name: str
|
|
33
|
+
color: str
|
|
34
|
+
description: Optional[str]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def from_dict(cls, data: dict[str, Any]) -> "Label":
|
|
38
|
+
"""Create Label from API response dictionary."""
|
|
39
|
+
return cls(
|
|
40
|
+
name=str(data["name"]),
|
|
41
|
+
color=str(data["color"]),
|
|
42
|
+
description=(
|
|
43
|
+
str(data["description"])
|
|
44
|
+
if data.get("description") is not None
|
|
45
|
+
else None
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class Comment:
|
|
52
|
+
"""GitHub issue/PR comment."""
|
|
53
|
+
|
|
54
|
+
id: int
|
|
55
|
+
user: User
|
|
56
|
+
body: str
|
|
57
|
+
created_at: datetime
|
|
58
|
+
updated_at: datetime
|
|
59
|
+
html_url: str
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_dict(cls, data: dict[str, Any]) -> "Comment":
|
|
63
|
+
"""Create Comment from API response dictionary."""
|
|
64
|
+
return cls(
|
|
65
|
+
id=int(data["id"]),
|
|
66
|
+
user=User.from_dict(dict(data["user"])),
|
|
67
|
+
body=str(data["body"]),
|
|
68
|
+
created_at=datetime.fromisoformat(
|
|
69
|
+
str(data["created_at"]).replace("Z", "+00:00")
|
|
70
|
+
),
|
|
71
|
+
updated_at=datetime.fromisoformat(
|
|
72
|
+
str(data["updated_at"]).replace("Z", "+00:00")
|
|
73
|
+
),
|
|
74
|
+
html_url=str(data["html_url"]),
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class ReviewComment:
|
|
80
|
+
"""GitHub review comment (inline code comment)."""
|
|
81
|
+
|
|
82
|
+
id: int
|
|
83
|
+
user: User
|
|
84
|
+
body: str
|
|
85
|
+
path: str
|
|
86
|
+
position: Optional[int]
|
|
87
|
+
original_position: Optional[int]
|
|
88
|
+
commit_id: str
|
|
89
|
+
original_commit_id: str
|
|
90
|
+
diff_hunk: str
|
|
91
|
+
created_at: datetime
|
|
92
|
+
updated_at: datetime
|
|
93
|
+
html_url: str
|
|
94
|
+
in_reply_to_id: Optional[int]
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def from_dict(cls, data: dict[str, Any]) -> "ReviewComment":
|
|
98
|
+
"""Create ReviewComment from API response dictionary."""
|
|
99
|
+
return cls(
|
|
100
|
+
id=int(data["id"]),
|
|
101
|
+
user=User.from_dict(dict(data["user"])),
|
|
102
|
+
body=str(data["body"]),
|
|
103
|
+
path=str(data["path"]),
|
|
104
|
+
position=int(data["position"]) if data.get("position") else None,
|
|
105
|
+
original_position=(
|
|
106
|
+
int(data["original_position"])
|
|
107
|
+
if data.get("original_position")
|
|
108
|
+
else None
|
|
109
|
+
),
|
|
110
|
+
commit_id=str(data["commit_id"]),
|
|
111
|
+
original_commit_id=str(data["original_commit_id"]),
|
|
112
|
+
diff_hunk=str(data["diff_hunk"]),
|
|
113
|
+
created_at=datetime.fromisoformat(
|
|
114
|
+
str(data["created_at"]).replace("Z", "+00:00")
|
|
115
|
+
),
|
|
116
|
+
updated_at=datetime.fromisoformat(
|
|
117
|
+
str(data["updated_at"]).replace("Z", "+00:00")
|
|
118
|
+
),
|
|
119
|
+
html_url=str(data["html_url"]),
|
|
120
|
+
in_reply_to_id=(
|
|
121
|
+
int(data["in_reply_to_id"]) if data.get("in_reply_to_id") else None
|
|
122
|
+
),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class Review:
|
|
128
|
+
"""GitHub PR review."""
|
|
129
|
+
|
|
130
|
+
id: int
|
|
131
|
+
user: User
|
|
132
|
+
body: Optional[str]
|
|
133
|
+
state: str
|
|
134
|
+
html_url: str
|
|
135
|
+
submitted_at: Optional[datetime]
|
|
136
|
+
commit_id: str
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def from_dict(cls, data: dict[str, Any]) -> "Review":
|
|
140
|
+
"""Create Review from API response dictionary."""
|
|
141
|
+
submitted_at = None
|
|
142
|
+
if data.get("submitted_at"):
|
|
143
|
+
submitted_at = datetime.fromisoformat(
|
|
144
|
+
str(data["submitted_at"]).replace("Z", "+00:00")
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return cls(
|
|
148
|
+
id=int(data["id"]),
|
|
149
|
+
user=User.from_dict(dict(data["user"])),
|
|
150
|
+
body=str(data["body"]) if data.get("body") is not None else None,
|
|
151
|
+
state=str(data["state"]),
|
|
152
|
+
html_url=str(data["html_url"]),
|
|
153
|
+
submitted_at=submitted_at,
|
|
154
|
+
commit_id=str(data["commit_id"]),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class PullRequest:
|
|
160
|
+
"""GitHub Pull Request."""
|
|
161
|
+
|
|
162
|
+
number: int
|
|
163
|
+
title: str
|
|
164
|
+
body: Optional[str]
|
|
165
|
+
state: str
|
|
166
|
+
user: User
|
|
167
|
+
created_at: datetime
|
|
168
|
+
updated_at: datetime
|
|
169
|
+
closed_at: Optional[datetime]
|
|
170
|
+
merged_at: Optional[datetime]
|
|
171
|
+
merge_commit_sha: Optional[str]
|
|
172
|
+
html_url: str
|
|
173
|
+
labels: list[Label]
|
|
174
|
+
additions: int
|
|
175
|
+
deletions: int
|
|
176
|
+
changed_files: int
|
|
177
|
+
head_ref: str
|
|
178
|
+
base_ref: str
|
|
179
|
+
head_sha: str
|
|
180
|
+
base_sha: str
|
|
181
|
+
|
|
182
|
+
@classmethod
|
|
183
|
+
def from_dict(cls, data: dict[str, Any]) -> "PullRequest":
|
|
184
|
+
"""Create PullRequest from API response dictionary."""
|
|
185
|
+
closed_at = None
|
|
186
|
+
if data.get("closed_at"):
|
|
187
|
+
closed_at = datetime.fromisoformat(
|
|
188
|
+
str(data["closed_at"]).replace("Z", "+00:00")
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
merged_at = None
|
|
192
|
+
if data.get("merged_at"):
|
|
193
|
+
merged_at = datetime.fromisoformat(
|
|
194
|
+
str(data["merged_at"]).replace("Z", "+00:00")
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return cls(
|
|
198
|
+
number=int(data["number"]),
|
|
199
|
+
title=str(data["title"]),
|
|
200
|
+
body=str(data["body"]) if data.get("body") is not None else None,
|
|
201
|
+
state=str(data["state"]),
|
|
202
|
+
user=User.from_dict(dict(data["user"])),
|
|
203
|
+
created_at=datetime.fromisoformat(
|
|
204
|
+
str(data["created_at"]).replace("Z", "+00:00")
|
|
205
|
+
),
|
|
206
|
+
updated_at=datetime.fromisoformat(
|
|
207
|
+
str(data["updated_at"]).replace("Z", "+00:00")
|
|
208
|
+
),
|
|
209
|
+
closed_at=closed_at,
|
|
210
|
+
merged_at=merged_at,
|
|
211
|
+
merge_commit_sha=(
|
|
212
|
+
str(data["merge_commit_sha"])
|
|
213
|
+
if data.get("merge_commit_sha") is not None
|
|
214
|
+
else None
|
|
215
|
+
),
|
|
216
|
+
html_url=str(data["html_url"]),
|
|
217
|
+
labels=[Label.from_dict(dict(label)) for label in list(data["labels"])],
|
|
218
|
+
additions=int(data["additions"]),
|
|
219
|
+
deletions=int(data["deletions"]),
|
|
220
|
+
changed_files=int(data["changed_files"]),
|
|
221
|
+
head_ref=str(data["head"]["ref"]),
|
|
222
|
+
base_ref=str(data["base"]["ref"]),
|
|
223
|
+
head_sha=str(data["head"]["sha"]),
|
|
224
|
+
base_sha=str(data["base"]["sha"]),
|
|
225
|
+
)
|
pr2md/pr_extractor.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""GitHub Pull Request data extraction."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from pr2md.models import Comment, PullRequest, Review, ReviewComment
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GitHubAPIError(Exception):
|
|
14
|
+
"""Exception raised for GitHub API errors."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GitHubPRExtractor:
|
|
18
|
+
"""Extract Pull Request data from GitHub API."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, owner: str, repo: str, pr_number: int) -> None:
|
|
21
|
+
"""
|
|
22
|
+
Initialize the PR extractor.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
owner: Repository owner
|
|
26
|
+
repo: Repository name
|
|
27
|
+
pr_number: Pull request number
|
|
28
|
+
"""
|
|
29
|
+
self.owner = owner
|
|
30
|
+
self.repo = repo
|
|
31
|
+
self.pr_number = pr_number
|
|
32
|
+
self.base_url = "https://api.github.com"
|
|
33
|
+
self.session = requests.Session()
|
|
34
|
+
self.session.headers.update(
|
|
35
|
+
{
|
|
36
|
+
"Accept": "application/vnd.github.v3+json",
|
|
37
|
+
"User-Agent": "GitHub-PR-Extractor",
|
|
38
|
+
}
|
|
39
|
+
)
|
|
40
|
+
logger.info("Initialized extractor for %s/%s PR #%d", owner, repo, pr_number)
|
|
41
|
+
|
|
42
|
+
def _make_request(self, endpoint: str, accept_header: Optional[str] = None) -> Any:
|
|
43
|
+
"""
|
|
44
|
+
Make a request to the GitHub API.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
endpoint: API endpoint path
|
|
48
|
+
accept_header: Optional custom Accept header
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Response data (JSON or text)
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
GitHubAPIError: If the request fails
|
|
55
|
+
"""
|
|
56
|
+
url = f"{self.base_url}{endpoint}"
|
|
57
|
+
headers = {}
|
|
58
|
+
if accept_header:
|
|
59
|
+
headers["Accept"] = accept_header
|
|
60
|
+
|
|
61
|
+
logger.debug("Making request to %s", url)
|
|
62
|
+
response = self.session.get(url, headers=headers, timeout=30)
|
|
63
|
+
|
|
64
|
+
if response.status_code == 404:
|
|
65
|
+
raise GitHubAPIError(
|
|
66
|
+
f"Resource not found: {url}. "
|
|
67
|
+
"Please check that the repository and PR number are correct."
|
|
68
|
+
)
|
|
69
|
+
if response.status_code == 403:
|
|
70
|
+
# Check if it's rate limiting
|
|
71
|
+
if "rate limit" in response.text.lower():
|
|
72
|
+
raise GitHubAPIError(
|
|
73
|
+
"GitHub API rate limit exceeded. "
|
|
74
|
+
"Please try again later or use authentication."
|
|
75
|
+
)
|
|
76
|
+
raise GitHubAPIError(f"Access forbidden: {url}")
|
|
77
|
+
if response.status_code != 200:
|
|
78
|
+
raise GitHubAPIError(
|
|
79
|
+
f"GitHub API request failed with status {response.status_code}: "
|
|
80
|
+
f"{response.text}"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if accept_header and "diff" in accept_header:
|
|
84
|
+
return str(response.text)
|
|
85
|
+
return response.json()
|
|
86
|
+
|
|
87
|
+
def fetch_pr_details(self) -> PullRequest:
|
|
88
|
+
"""
|
|
89
|
+
Fetch pull request details.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
PullRequest object
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
GitHubAPIError: If the request fails
|
|
96
|
+
"""
|
|
97
|
+
logger.info("Fetching PR details")
|
|
98
|
+
endpoint = f"/repos/{self.owner}/{self.repo}/pulls/{self.pr_number}"
|
|
99
|
+
data: dict[str, Any] = self._make_request(endpoint)
|
|
100
|
+
return PullRequest.from_dict(data)
|
|
101
|
+
|
|
102
|
+
def fetch_comments(self) -> list[Comment]:
|
|
103
|
+
"""
|
|
104
|
+
Fetch issue/PR comments (conversation thread).
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
List of Comment objects
|
|
108
|
+
|
|
109
|
+
Raises:
|
|
110
|
+
GitHubAPIError: If the request fails
|
|
111
|
+
"""
|
|
112
|
+
logger.info("Fetching comments")
|
|
113
|
+
endpoint = f"/repos/{self.owner}/{self.repo}/issues/{self.pr_number}/comments"
|
|
114
|
+
data: list[dict[str, Any]] = self._make_request(endpoint)
|
|
115
|
+
comments = [Comment.from_dict(dict(comment)) for comment in data]
|
|
116
|
+
logger.info("Found %d comments", len(comments))
|
|
117
|
+
return comments
|
|
118
|
+
|
|
119
|
+
def fetch_review_comments(self) -> list[ReviewComment]:
|
|
120
|
+
"""
|
|
121
|
+
Fetch review comments (inline code comments).
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of ReviewComment objects
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
GitHubAPIError: If the request fails
|
|
128
|
+
"""
|
|
129
|
+
logger.info("Fetching review comments")
|
|
130
|
+
endpoint = f"/repos/{self.owner}/{self.repo}/pulls/{self.pr_number}/comments"
|
|
131
|
+
data: list[dict[str, Any]] = self._make_request(endpoint)
|
|
132
|
+
review_comments = [ReviewComment.from_dict(dict(comment)) for comment in data]
|
|
133
|
+
logger.info("Found %d review comments", len(review_comments))
|
|
134
|
+
return review_comments
|
|
135
|
+
|
|
136
|
+
def fetch_reviews(self) -> list[Review]:
|
|
137
|
+
"""
|
|
138
|
+
Fetch PR reviews.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
List of Review objects
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
GitHubAPIError: If the request fails
|
|
145
|
+
"""
|
|
146
|
+
logger.info("Fetching reviews")
|
|
147
|
+
endpoint = f"/repos/{self.owner}/{self.repo}/pulls/{self.pr_number}/reviews"
|
|
148
|
+
data: list[dict[str, Any]] = self._make_request(endpoint)
|
|
149
|
+
reviews = [Review.from_dict(dict(review)) for review in data]
|
|
150
|
+
logger.info("Found %d reviews", len(reviews))
|
|
151
|
+
return reviews
|
|
152
|
+
|
|
153
|
+
def fetch_diff(self) -> str:
|
|
154
|
+
"""
|
|
155
|
+
Fetch PR diff.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Diff as a string
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
GitHubAPIError: If the request fails
|
|
162
|
+
"""
|
|
163
|
+
logger.info("Fetching diff")
|
|
164
|
+
endpoint = f"/repos/{self.owner}/{self.repo}/pulls/{self.pr_number}"
|
|
165
|
+
diff: str = self._make_request(
|
|
166
|
+
endpoint, accept_header="application/vnd.github.v3.diff"
|
|
167
|
+
)
|
|
168
|
+
logger.info("Fetched diff (%d bytes)", len(diff))
|
|
169
|
+
return diff
|
|
170
|
+
|
|
171
|
+
def extract_all(
|
|
172
|
+
self,
|
|
173
|
+
) -> tuple[PullRequest, list[Comment], list[Review], list[ReviewComment], str]:
|
|
174
|
+
"""
|
|
175
|
+
Extract all PR data.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Tuple of (PullRequest, comments, reviews, review_comments, diff)
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
GitHubAPIError: If any request fails
|
|
182
|
+
"""
|
|
183
|
+
logger.info("Extracting all PR data")
|
|
184
|
+
pull_request = self.fetch_pr_details()
|
|
185
|
+
comments = self.fetch_comments()
|
|
186
|
+
reviews = self.fetch_reviews()
|
|
187
|
+
review_comments = self.fetch_review_comments()
|
|
188
|
+
diff = self.fetch_diff()
|
|
189
|
+
logger.info("Successfully extracted all PR data")
|
|
190
|
+
return pull_request, comments, reviews, review_comments, diff
|
pr2md/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: PR2MD
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Pull Request Markdown Generator
|
|
5
|
+
Author: tboy1337
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/tboy1337/PR2MD
|
|
8
|
+
Project-URL: Repository, https://github.com/tboy1337/PR2MD
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Software Development :: Version Control :: Git
|
|
16
|
+
Classifier: Typing :: Typed
|
|
17
|
+
Requires-Python: >=3.13
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE.md
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# PR2MD - Pull Request to Markdown
|
|
23
|
+
|
|
24
|
+
[](https://www.python.org/downloads/)
|
|
25
|
+
[](LICENSE.md)
|
|
26
|
+
|
|
27
|
+
**PR2MD** is a powerful command-line tool that extracts GitHub Pull Request data and converts it into comprehensive, well-formatted Markdown documents. Perfect for documentation, archiving, code reviews, or offline analysis of pull requests.
|
|
28
|
+
|
|
29
|
+
## Features
|
|
30
|
+
|
|
31
|
+
- 📥 **Complete PR Data Extraction**: Retrieves all PR details including metadata, description, labels, and timestamps
|
|
32
|
+
- 💬 **Full Conversation Thread**: Captures all comments and discussions in chronological order
|
|
33
|
+
- ✅ **Review Information**: Includes all code reviews with approval status and reviewer comments
|
|
34
|
+
- 💻 **Code Comments**: Extracts inline review comments with their associated code context
|
|
35
|
+
- 📊 **Change Statistics**: Displays files changed, additions, deletions, and commit information
|
|
36
|
+
- 🔍 **Complete Diffs**: Includes the full unified diff of all changes
|
|
37
|
+
- 🎨 **Beautiful Formatting**: Generates clean, readable Markdown with proper structure and syntax highlighting
|
|
38
|
+
- ⚡ **Fast & Efficient**: Uses the official GitHub REST API with proper error handling
|
|
39
|
+
- 🔒 **Type-Safe**: Written in Python with comprehensive type annotations
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
### From Source
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# Clone the repository
|
|
47
|
+
git clone https://github.com/tboy1337/PR2MD.git
|
|
48
|
+
cd PR2MD
|
|
49
|
+
|
|
50
|
+
# Install dependencies
|
|
51
|
+
pip install -r requirements.txt
|
|
52
|
+
|
|
53
|
+
# Install the package
|
|
54
|
+
pip install -e .
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Requirements
|
|
58
|
+
|
|
59
|
+
- Python 3.13 or higher
|
|
60
|
+
- `requests` library (for GitHub API communication)
|
|
61
|
+
|
|
62
|
+
## Usage
|
|
63
|
+
|
|
64
|
+
### Basic Usage
|
|
65
|
+
|
|
66
|
+
Extract a PR using its URL:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pr2md https://github.com/owner/repo/pull/123
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Or specify the owner, repository, and PR number separately:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
pr2md owner repo 123
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Save to File
|
|
79
|
+
|
|
80
|
+
Output the Markdown to a file instead of stdout:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pr2md https://github.com/owner/repo/pull/123 -o pr-details.md
|
|
84
|
+
pr2md owner repo 123 --output pr-analysis.md
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Verbose Logging
|
|
88
|
+
|
|
89
|
+
Enable detailed logging for debugging:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pr2md https://github.com/owner/repo/pull/123 --verbose
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Help
|
|
96
|
+
|
|
97
|
+
View all available options:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pr2md --help
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Output Format
|
|
104
|
+
|
|
105
|
+
The generated Markdown document includes:
|
|
106
|
+
|
|
107
|
+
### 1. PR Header
|
|
108
|
+
- PR number, title, and status (Open/Closed/Merged)
|
|
109
|
+
- Author information with GitHub profile link
|
|
110
|
+
- Creation, update, closed, and merged timestamps
|
|
111
|
+
- Base and head branch information with commit SHAs
|
|
112
|
+
- Labels (if any)
|
|
113
|
+
|
|
114
|
+
### 2. Description
|
|
115
|
+
- The full PR description/body
|
|
116
|
+
|
|
117
|
+
### 3. Changes Summary
|
|
118
|
+
- Number of files changed
|
|
119
|
+
- Line additions and deletions
|
|
120
|
+
|
|
121
|
+
### 4. Code Diff
|
|
122
|
+
- Complete unified diff of all changes
|
|
123
|
+
- Syntax-highlighted code blocks
|
|
124
|
+
|
|
125
|
+
### 5. Conversation Thread
|
|
126
|
+
- All comments from the PR discussion
|
|
127
|
+
- Chronologically sorted
|
|
128
|
+
- Author attribution and timestamps
|
|
129
|
+
- Links back to GitHub
|
|
130
|
+
|
|
131
|
+
### 6. Reviews
|
|
132
|
+
- All submitted reviews
|
|
133
|
+
- Review state (Approved ✅, Changes Requested 🔴, Commented 💬, etc.)
|
|
134
|
+
- Review comments and timestamps
|
|
135
|
+
|
|
136
|
+
### 7. Review Comments (Code Comments)
|
|
137
|
+
- Inline code review comments
|
|
138
|
+
- Grouped by file
|
|
139
|
+
- Includes code context (diff hunk)
|
|
140
|
+
- Reply chains preserved
|
|
141
|
+
|
|
142
|
+
## Example
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
# Extract PR #42 from the PR2MD repository
|
|
146
|
+
pr2md tboy1337 PR2MD 42 -o pr-42.md
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
This creates a file `pr-42.md` containing all the PR information in a beautifully formatted Markdown document.
|
|
150
|
+
|
|
151
|
+
## GitHub API Rate Limiting
|
|
152
|
+
|
|
153
|
+
The tool uses the GitHub REST API without authentication by default. GitHub imposes rate limits:
|
|
154
|
+
|
|
155
|
+
- **Unauthenticated requests**: 60 requests per hour
|
|
156
|
+
- **Authenticated requests**: 5,000 requests per hour
|
|
157
|
+
|
|
158
|
+
For most use cases, unauthenticated access is sufficient as the tool makes only a few API calls per PR. If you encounter rate limiting issues, the tool will provide clear error messages.
|
|
159
|
+
|
|
160
|
+
**Future Enhancement**: Authentication support is planned for a future release to enable higher rate limits and access to private repositories.
|
|
161
|
+
|
|
162
|
+
## Development
|
|
163
|
+
|
|
164
|
+
### Setup Development Environment
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
# Clone the repository
|
|
168
|
+
git clone https://github.com/tboy1337/PR2MD.git
|
|
169
|
+
cd PR2MD
|
|
170
|
+
|
|
171
|
+
# Install development dependencies
|
|
172
|
+
pip install -r requirements-dev.txt
|
|
173
|
+
|
|
174
|
+
# Install the package in editable mode
|
|
175
|
+
pip install -e .
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Running Tests
|
|
179
|
+
|
|
180
|
+
The project includes comprehensive tests using pytest:
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
# Run all tests
|
|
184
|
+
pytest
|
|
185
|
+
|
|
186
|
+
# Run with coverage
|
|
187
|
+
pytest --cov=pr2md --cov-report=html
|
|
188
|
+
|
|
189
|
+
# Run specific test file
|
|
190
|
+
pytest tests/test_cli.py
|
|
191
|
+
|
|
192
|
+
# Run with verbose output
|
|
193
|
+
pytest -v
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Code Quality
|
|
197
|
+
|
|
198
|
+
The project maintains high code quality standards:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
# Type checking with mypy
|
|
202
|
+
mypy src/pr2md
|
|
203
|
+
|
|
204
|
+
# Linting with pylint
|
|
205
|
+
pylint src/pr2md
|
|
206
|
+
|
|
207
|
+
# Code formatting with black
|
|
208
|
+
black src/pr2md tests
|
|
209
|
+
|
|
210
|
+
# Import sorting with isort
|
|
211
|
+
isort src/pr2md tests
|
|
212
|
+
|
|
213
|
+
# Remove trailing whitespace
|
|
214
|
+
py -m autopep8 --in-place --select=W291,W293 src tests
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Project Structure
|
|
218
|
+
|
|
219
|
+
```
|
|
220
|
+
PR2MD/
|
|
221
|
+
├── src/
|
|
222
|
+
│ └── pr2md/
|
|
223
|
+
│ ├── __init__.py
|
|
224
|
+
│ ├── __main__.py # Entry point
|
|
225
|
+
│ ├── cli.py # Command-line interface
|
|
226
|
+
│ ├── models.py # Data models
|
|
227
|
+
│ ├── pr_extractor.py # GitHub API client
|
|
228
|
+
│ ├── formatter.py # Markdown formatter
|
|
229
|
+
│ └── py.typed # Type checking marker
|
|
230
|
+
├── tests/ # Comprehensive test suite
|
|
231
|
+
├── pyproject.toml # Project configuration
|
|
232
|
+
├── requirements.txt # Runtime dependencies
|
|
233
|
+
├── requirements-dev.txt # Development dependencies
|
|
234
|
+
├── mypy.ini # Type checking configuration
|
|
235
|
+
├── pytest.ini # Test configuration
|
|
236
|
+
└── README.md # This file
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Architecture
|
|
240
|
+
|
|
241
|
+
### Core Components
|
|
242
|
+
|
|
243
|
+
1. **CLI Module** (`cli.py`): Handles command-line argument parsing, logging setup, and orchestrates the extraction and formatting process.
|
|
244
|
+
|
|
245
|
+
2. **PR Extractor** (`pr_extractor.py`): Communicates with the GitHub REST API to fetch PR data, comments, reviews, and diffs. Includes comprehensive error handling.
|
|
246
|
+
|
|
247
|
+
3. **Models** (`models.py`): Type-safe data classes representing GitHub entities (PullRequest, Comment, Review, ReviewComment, User, Label).
|
|
248
|
+
|
|
249
|
+
4. **Formatter** (`formatter.py`): Converts structured PR data into beautifully formatted Markdown with proper sections and syntax highlighting.
|
|
250
|
+
|
|
251
|
+
### Design Principles
|
|
252
|
+
|
|
253
|
+
- **Type Safety**: Full type annotations throughout the codebase
|
|
254
|
+
- **Error Handling**: Graceful handling of API errors and edge cases
|
|
255
|
+
- **Logging**: Comprehensive logging for debugging and monitoring
|
|
256
|
+
- **Testability**: Modular design with clear separation of concerns
|
|
257
|
+
- **Extensibility**: Easy to add new features or output formats
|
|
258
|
+
|
|
259
|
+
## Use Cases
|
|
260
|
+
|
|
261
|
+
- **Code Review Documentation**: Archive code reviews for compliance or historical reference
|
|
262
|
+
- **Offline Analysis**: Review PRs without internet connectivity
|
|
263
|
+
- **Pull Request Templates**: Learn from well-structured PRs
|
|
264
|
+
- **Change Management**: Document significant changes in projects
|
|
265
|
+
- **Training Materials**: Create educational resources from real-world code reviews
|
|
266
|
+
- **Audit Trails**: Maintain records of development decisions
|
|
267
|
+
- **Report Generation**: Include PR details in project reports
|
|
268
|
+
|
|
269
|
+
## Limitations
|
|
270
|
+
|
|
271
|
+
- Currently supports only public GitHub repositories (authentication coming soon)
|
|
272
|
+
- Rate limited by GitHub API (60 requests/hour without authentication)
|
|
273
|
+
- Requires internet connection to fetch data
|
|
274
|
+
- Large PRs with extensive diffs may generate very large Markdown files
|
|
275
|
+
|
|
276
|
+
## Roadmap
|
|
277
|
+
|
|
278
|
+
- [ ] GitHub authentication support (personal access tokens)
|
|
279
|
+
- [ ] Support for GitHub Enterprise
|
|
280
|
+
- [ ] Private repository access
|
|
281
|
+
- [ ] Batch processing of multiple PRs
|
|
282
|
+
- [ ] Custom output templates
|
|
283
|
+
- [ ] Additional output formats (HTML, PDF)
|
|
284
|
+
- [ ] Diff filtering and summarization
|
|
285
|
+
- [ ] PR comparison tool
|
|
286
|
+
- [ ] Integration with CI/CD pipelines
|
|
287
|
+
|
|
288
|
+
## Contributing
|
|
289
|
+
|
|
290
|
+
This project is maintained by tboy1337. Contributions, issues, and feature requests are welcome! Feel free to check the [issues page](https://github.com/tboy1337/PR2MD/issues).
|
|
291
|
+
|
|
292
|
+
## License
|
|
293
|
+
|
|
294
|
+
This project is licensed under the **Commercial Restricted License (CRL) Version 1.1**.
|
|
295
|
+
|
|
296
|
+
**Summary:**
|
|
297
|
+
- ✅ **Free for non-commercial use** (personal, educational, research, open source)
|
|
298
|
+
- ❌ **Commercial use requires a separate commercial license**
|
|
299
|
+
- 📧 Contact the copyright holder for commercial licensing inquiries
|
|
300
|
+
|
|
301
|
+
See the [LICENSE.md](LICENSE.md) file for the complete license text.
|
|
302
|
+
|
|
303
|
+
## Author
|
|
304
|
+
|
|
305
|
+
**tboy1337**
|
|
306
|
+
- GitHub: [@tboy1337](https://github.com/tboy1337)
|
|
307
|
+
|
|
308
|
+
## Acknowledgments
|
|
309
|
+
|
|
310
|
+
- Built with Python 3.13+
|
|
311
|
+
- Uses the [GitHub REST API](https://docs.github.com/en/rest)
|
|
312
|
+
- Inspired by the need for better PR documentation tools
|
|
313
|
+
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
**Made with ❤️ for the developer community**
|
|
317
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
pr2md/__init__.py,sha256=VFRYcFv1Y_5bsSnO_evEQPKYHtawYPAvp4P7-jPtY1E,547
|
|
2
|
+
pr2md/__main__.py,sha256=v1ZnD7wEzDbUi1jOplNEezAE2lGqs8hazSR-fp8-va0,122
|
|
3
|
+
pr2md/cli.py,sha256=Gwws5MXg7BUEI7z8ncADQwhaVweM3mmB2gn-EcUpFfE,6725
|
|
4
|
+
pr2md/formatter.py,sha256=4L6P7udNlYeEQcIU59uNYeG-DIWp5nx67YCyZ9b1UnE,8122
|
|
5
|
+
pr2md/models.py,sha256=L67GK-G48YcjadysFJ0u_AMV62PHBqp-D_VN4RROAvU,6739
|
|
6
|
+
pr2md/pr_extractor.py,sha256=5aDPCRu_AVdDZ9_jOX5l7qkcmnSh0gzoKIUCca9ro3Y,6328
|
|
7
|
+
pr2md/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
pr2md-1.0.1.dist-info/licenses/LICENSE.md,sha256=NYP65MjIOiBIzSEUIwUQw56lJYkP_7nGPFfgIgyl-iI,3035
|
|
9
|
+
pr2md-1.0.1.dist-info/METADATA,sha256=QKE-I8tVh-5IuzzvusoyKjl8Lg9wdpBvRxT_3-diGHM,9500
|
|
10
|
+
pr2md-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
pr2md-1.0.1.dist-info/entry_points.txt,sha256=r-BbhudThY0lrrEhlT3iq6GzEMyVzh-fBPOUZl3Z-yQ,46
|
|
12
|
+
pr2md-1.0.1.dist-info/top_level.txt,sha256=smkBu5Jubu9z2ItjJY8ekPudzTnXyoe74EXLHHtGGwc,6
|
|
13
|
+
pr2md-1.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Commercial Restricted License (CRL)
|
|
2
|
+
|
|
3
|
+
**Version 1.1**
|
|
4
|
+
|
|
5
|
+
**Copyright (c) 2025 tboy1337**
|
|
6
|
+
|
|
7
|
+
## Grant of Rights
|
|
8
|
+
|
|
9
|
+
Subject to the terms and conditions of this license, the copyright holder grants you a worldwide, royalty-free, non-exclusive license to use, copy, modify, and distribute this software and associated documentation files (the "Software") for **Non-Commercial Use** only.
|
|
10
|
+
|
|
11
|
+
## Definitions
|
|
12
|
+
|
|
13
|
+
**"Non-Commercial Use"** means use of the Software that is not primarily intended for or directed toward commercial advantage or monetary compensation. Non-Commercial Use includes:
|
|
14
|
+
|
|
15
|
+
- Personal use, learning, and experimentation
|
|
16
|
+
- Academic research and education
|
|
17
|
+
- Open source projects that are not monetized
|
|
18
|
+
- Internal evaluation within a commercial organization (limited to 30 days)
|
|
19
|
+
- Use by registered non-profit organizations for their non-profit activities
|
|
20
|
+
|
|
21
|
+
**"Commercial Use"** means any use of the Software that is primarily intended for or directed toward commercial advantage or monetary compensation, including but not limited to:
|
|
22
|
+
|
|
23
|
+
- Use in any product or service that generates revenue
|
|
24
|
+
- Use by for-profit organizations in their business operations
|
|
25
|
+
- Integration into commercial software or services
|
|
26
|
+
- Use in providing paid consulting, support, or services
|
|
27
|
+
- Use in any business process that contributes to revenue generation
|
|
28
|
+
- Use by organizations with annual revenue exceeding $100,000 USD
|
|
29
|
+
|
|
30
|
+
**Note:** Any Commercial Use of the Software requires a separate commercial license from the copyright holder.
|
|
31
|
+
|
|
32
|
+
## Conditions
|
|
33
|
+
|
|
34
|
+
For Non-Commercial Use, you may:
|
|
35
|
+
|
|
36
|
+
- Use, copy, and modify the Software
|
|
37
|
+
- Distribute copies of the Software
|
|
38
|
+
- Distribute your modifications under this same license
|
|
39
|
+
|
|
40
|
+
You must:
|
|
41
|
+
|
|
42
|
+
- Include this license notice in all copies or substantial portions of the Software
|
|
43
|
+
- Not remove or alter any copyright notices
|
|
44
|
+
|
|
45
|
+
## Restrictions
|
|
46
|
+
|
|
47
|
+
You may not:
|
|
48
|
+
|
|
49
|
+
- Use the Software for Commercial Use without a commercial license
|
|
50
|
+
- Sublicense the Software under different terms
|
|
51
|
+
- Use the Software in any way that violates applicable laws
|
|
52
|
+
- Remove, obscure, or modify any licensing, copyright, or other legal notices
|
|
53
|
+
|
|
54
|
+
## No Warranty
|
|
55
|
+
|
|
56
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
57
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
58
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
59
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
60
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
61
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
62
|
+
SOFTWARE.
|
|
63
|
+
|
|
64
|
+
## Termination
|
|
65
|
+
|
|
66
|
+
This license terminates automatically if you violate any of its terms. Upon termination, you must cease all use and distribution of the Software and destroy all copies in your possession.
|
|
67
|
+
|
|
68
|
+
## Governing Law
|
|
69
|
+
|
|
70
|
+
This license shall be governed by and construed in accordance with the laws of the United Kingdom, without regard to its conflict of law provisions.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pr2md
|