pr-context-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pr_context_engine-0.1.0.dist-info/METADATA +211 -0
- pr_context_engine-0.1.0.dist-info/RECORD +29 -0
- pr_context_engine-0.1.0.dist-info/WHEEL +4 -0
- pr_context_engine-0.1.0.dist-info/entry_points.txt +2 -0
- pr_context_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
- src/__init__.py +1 -0
- src/analyzers/__init__.py +1 -0
- src/analyzers/ast_walker.py +91 -0
- src/analyzers/diff_parser.py +158 -0
- src/analyzers/risk_scorer.py +121 -0
- src/briefing/__init__.py +5 -0
- src/briefing/generator.py +229 -0
- src/briefing/prompt_templates.py +67 -0
- src/cli.py +329 -0
- src/config.py +118 -0
- src/context/__init__.py +1 -0
- src/context/codebase_index.py +382 -0
- src/context/git_history.py +225 -0
- src/fixes/__init__.py +1 -0
- src/fixes/confidence.py +60 -0
- src/fixes/fix_generator.py +152 -0
- src/github_api/__init__.py +3 -0
- src/github_api/comment_poster.py +95 -0
- src/llm/__init__.py +106 -0
- src/llm/anthropic_provider.py +32 -0
- src/llm/base.py +11 -0
- src/llm/gemini_provider.py +33 -0
- src/llm/groq_provider.py +30 -0
- src/llm/ollama_provider.py +41 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""Generate structured PR briefings using LLM with senior-voice prompting."""
|
|
2
|
+
import logging
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from src.analyzers.diff_parser import FileChange
|
|
6
|
+
from src.analyzers.risk_scorer import RiskFlag
|
|
7
|
+
from src.briefing.prompt_templates import SYSTEM_PROMPT
|
|
8
|
+
from src.context.codebase_index import RelatedChunk
|
|
9
|
+
from src.context.git_history import FileHistory, RecentPR
|
|
10
|
+
from src.llm.base import LLMProvider
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class Briefing:
|
|
17
|
+
"""A structured PR briefing generated by the LLM."""
|
|
18
|
+
|
|
19
|
+
what_changed: str
|
|
20
|
+
blast_radius: str
|
|
21
|
+
risk_flags: str
|
|
22
|
+
questions: str
|
|
23
|
+
raw_response: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def generate_briefing(
|
|
27
|
+
provider: LLMProvider,
|
|
28
|
+
changes: list[FileChange],
|
|
29
|
+
changed_symbols: dict[str, list[str]],
|
|
30
|
+
flags: list[RiskFlag],
|
|
31
|
+
related_code: dict[str, list[RelatedChunk]] | None = None,
|
|
32
|
+
git_history: dict[str, FileHistory] | None = None,
|
|
33
|
+
recent_prs: list[RecentPR] | None = None,
|
|
34
|
+
) -> Briefing:
|
|
35
|
+
"""Generate a structured briefing from diff analysis.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
provider: LLM provider instance to call for generation.
|
|
39
|
+
changes: List of parsed file changes from the diff.
|
|
40
|
+
changed_symbols: Mapping of file paths to changed function/class names.
|
|
41
|
+
flags: Risk flags detected by heuristic analysis.
|
|
42
|
+
related_code: Optional mapping of file paths to semantically similar chunks from the repo.
|
|
43
|
+
git_history: Optional mapping of file paths to their recent commit history.
|
|
44
|
+
recent_prs: Optional list of recently merged PRs touching the same files.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
A Briefing object with structured sections.
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
RuntimeError: If the LLM provider fails to generate a response.
|
|
51
|
+
"""
|
|
52
|
+
prompt = _assemble_prompt(changes, changed_symbols, flags, related_code, git_history, recent_prs)
|
|
53
|
+
logger.info("Assembled prompt (%d chars)", len(prompt))
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
raw_response = provider.generate(prompt)
|
|
57
|
+
except Exception as exc:
|
|
58
|
+
raise RuntimeError(f"LLM provider failed to generate briefing: {exc}") from exc
|
|
59
|
+
|
|
60
|
+
logger.info("Generated briefing (%d chars)", len(raw_response))
|
|
61
|
+
|
|
62
|
+
# Parse the LLM response into structured sections.
|
|
63
|
+
# The LLM is prompted to follow a specific format, so we parse by section headers.
|
|
64
|
+
sections = _parse_sections(raw_response)
|
|
65
|
+
|
|
66
|
+
briefing = Briefing(
|
|
67
|
+
what_changed=sections.get("what_changed", ""),
|
|
68
|
+
blast_radius=sections.get("blast_radius", ""),
|
|
69
|
+
risk_flags=sections.get("risk_flags", ""),
|
|
70
|
+
questions=sections.get("questions", ""),
|
|
71
|
+
raw_response=raw_response,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
_validate_briefing(briefing)
|
|
75
|
+
return briefing
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _assemble_prompt(
|
|
79
|
+
changes: list[FileChange],
|
|
80
|
+
changed_symbols: dict[str, list[str]],
|
|
81
|
+
flags: list[RiskFlag],
|
|
82
|
+
related_code: dict[str, list[RelatedChunk]] | None = None,
|
|
83
|
+
git_history: dict[str, FileHistory] | None = None,
|
|
84
|
+
recent_prs: list[RecentPR] | None = None,
|
|
85
|
+
) -> str:
|
|
86
|
+
"""Assemble the full prompt: system prompt + structured context."""
|
|
87
|
+
parts: list[str] = [SYSTEM_PROMPT]
|
|
88
|
+
|
|
89
|
+
parts.append("\n---\n## CONTEXT\n")
|
|
90
|
+
|
|
91
|
+
parts.append("### Changed files\n")
|
|
92
|
+
for change in changes:
|
|
93
|
+
if change.is_new_file:
|
|
94
|
+
action = "new file"
|
|
95
|
+
elif change.is_deleted_file:
|
|
96
|
+
action = "deleted"
|
|
97
|
+
else:
|
|
98
|
+
action = "modified"
|
|
99
|
+
|
|
100
|
+
symbols = changed_symbols.get(change.path, [])
|
|
101
|
+
symbol_str = f" — symbols: {', '.join(symbols)}" if symbols else ""
|
|
102
|
+
parts.append(
|
|
103
|
+
f"- `{change.path}` ({change.language}, {action})"
|
|
104
|
+
f" +{len(change.added_lines)}/-{len(change.removed_lines)} lines{symbol_str}\n"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
parts.append("\n### Risk flags\n")
|
|
108
|
+
if flags:
|
|
109
|
+
for flag in flags:
|
|
110
|
+
loc = f":{flag.line}" if flag.line is not None else ""
|
|
111
|
+
parts.append(f"- [{flag.flag}] `{flag.file}{loc}` — {flag.snippet}\n")
|
|
112
|
+
else:
|
|
113
|
+
parts.append("None detected.\n")
|
|
114
|
+
|
|
115
|
+
if git_history:
|
|
116
|
+
parts.append("\n### Recent activity on touched files\n")
|
|
117
|
+
for file_path, history in git_history.items():
|
|
118
|
+
if not history.recent_commits and not history.limited_history:
|
|
119
|
+
continue
|
|
120
|
+
parts.append(f"\n**`{file_path}`**")
|
|
121
|
+
if history.limited_history and not history.recent_commits:
|
|
122
|
+
parts.append(" — limited history (shallow clone)\n")
|
|
123
|
+
else:
|
|
124
|
+
if history.limited_history:
|
|
125
|
+
parts.append(" (history may be truncated — shallow clone)")
|
|
126
|
+
parts.append("\n")
|
|
127
|
+
for commit in history.recent_commits:
|
|
128
|
+
parts.append(f"- `{commit.sha}` {commit.message}\n")
|
|
129
|
+
|
|
130
|
+
if recent_prs:
|
|
131
|
+
parts.append("\n### Recent merged PRs touching these files\n")
|
|
132
|
+
for pr in recent_prs:
|
|
133
|
+
desc = f" — {pr.body_first_line}" if pr.body_first_line else ""
|
|
134
|
+
parts.append(f"- PR #{pr.number}: \"{pr.title}\"{desc}\n")
|
|
135
|
+
|
|
136
|
+
if related_code:
|
|
137
|
+
parts.append("\n### Related code (semantically similar, from elsewhere in repo)\n")
|
|
138
|
+
for file_path, chunks in related_code.items():
|
|
139
|
+
if not chunks:
|
|
140
|
+
continue
|
|
141
|
+
parts.append(f"\n**Related to `{file_path}`:**\n")
|
|
142
|
+
for chunk in chunks[:3]:
|
|
143
|
+
# Truncate long chunks to keep prompt under token budget
|
|
144
|
+
snippet = chunk.chunk_text[:400]
|
|
145
|
+
if len(chunk.chunk_text) > 400:
|
|
146
|
+
snippet += "\n..."
|
|
147
|
+
parts.append(f"- `{chunk.file_path}` ({chunk.label}):\n```\n{snippet}\n```\n")
|
|
148
|
+
|
|
149
|
+
return "".join(parts)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _parse_sections(response: str) -> dict[str, str]:
|
|
153
|
+
"""Parse LLM response into section keys.
|
|
154
|
+
|
|
155
|
+
The LLM is prompted to structure output as:
|
|
156
|
+
1. WHAT CHANGED
|
|
157
|
+
2. BLAST RADIUS
|
|
158
|
+
3. RISK FLAGS
|
|
159
|
+
4. QUESTIONS
|
|
160
|
+
|
|
161
|
+
This parser extracts content for each numbered section. If a section is not
|
|
162
|
+
found, it returns an empty string for that section.
|
|
163
|
+
"""
|
|
164
|
+
sections: dict[str, str] = {
|
|
165
|
+
"what_changed": "",
|
|
166
|
+
"blast_radius": "",
|
|
167
|
+
"risk_flags": "",
|
|
168
|
+
"questions": "",
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# Check for section headers in order of specificity (longest first)
|
|
172
|
+
# Only use full section names to avoid matching content lines like "1. Why?"
|
|
173
|
+
section_headers = [
|
|
174
|
+
("1. WHAT CHANGED", "what_changed"),
|
|
175
|
+
("2. BLAST RADIUS", "blast_radius"),
|
|
176
|
+
("3. RISK FLAGS", "risk_flags"),
|
|
177
|
+
("4. QUESTIONS", "questions"),
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
lines = response.split("\n")
|
|
181
|
+
current_section = None
|
|
182
|
+
current_content: list[str] = []
|
|
183
|
+
found_sections = set()
|
|
184
|
+
|
|
185
|
+
for line in lines:
|
|
186
|
+
# Check if this line starts a new section (full header match only)
|
|
187
|
+
section_found = False
|
|
188
|
+
stripped = line.strip()
|
|
189
|
+
for section_header, section_key in section_headers:
|
|
190
|
+
if stripped.startswith(section_header):
|
|
191
|
+
# Save the previous section content
|
|
192
|
+
if current_section:
|
|
193
|
+
sections[current_section] = "\n".join(current_content).strip()
|
|
194
|
+
current_section = section_key
|
|
195
|
+
found_sections.add(section_key)
|
|
196
|
+
current_content = []
|
|
197
|
+
section_found = True
|
|
198
|
+
break
|
|
199
|
+
|
|
200
|
+
# If this line doesn't start a new section and we're in a section, collect the content
|
|
201
|
+
if not section_found and current_section is not None:
|
|
202
|
+
current_content.append(line)
|
|
203
|
+
|
|
204
|
+
# Save the last section
|
|
205
|
+
if current_section:
|
|
206
|
+
sections[current_section] = "\n".join(current_content).strip()
|
|
207
|
+
|
|
208
|
+
# Log which sections were found for debugging
|
|
209
|
+
missing = set(sections.keys()) - found_sections
|
|
210
|
+
if missing:
|
|
211
|
+
logger.warning("Missing expected sections in LLM response: %s", ", ".join(sorted(missing)))
|
|
212
|
+
|
|
213
|
+
return sections
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _validate_briefing(briefing: Briefing) -> None:
|
|
217
|
+
"""Validate that briefing sections have meaningful content.
|
|
218
|
+
|
|
219
|
+
Logs warnings if any section is unexpectedly empty, which indicates the LLM
|
|
220
|
+
may not have followed the expected format or the parsing failed.
|
|
221
|
+
"""
|
|
222
|
+
if not briefing.what_changed.strip():
|
|
223
|
+
logger.warning("Briefing section 'what_changed' is empty")
|
|
224
|
+
if not briefing.blast_radius.strip():
|
|
225
|
+
logger.warning("Briefing section 'blast_radius' is empty")
|
|
226
|
+
if not briefing.risk_flags.strip():
|
|
227
|
+
logger.warning("Briefing section 'risk_flags' is empty")
|
|
228
|
+
if not briefing.questions.strip():
|
|
229
|
+
logger.warning("Briefing section 'questions' is empty")
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""System prompt and prompt-assembly utilities for generating senior-voice PR briefings.
|
|
2
|
+
|
|
3
|
+
FIX_SYSTEM_PROMPT is used exclusively by the fix generator for per-flag LLM calls.
|
|
4
|
+
It is never merged into SYSTEM_PROMPT so the briefing behaviour is unchanged when
|
|
5
|
+
ENABLE_FIXES=false.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
SYSTEM_PROMPT = """You are a senior backend engineer reviewing a pull request. You have 90 seconds.
|
|
9
|
+
Your job is to brief the human reviewer so they can review effectively.
|
|
10
|
+
|
|
11
|
+
You will receive:
|
|
12
|
+
- A list of changed files with parsed function/class names
|
|
13
|
+
- Risk flags detected by static heuristics
|
|
14
|
+
- Recent commit history on touched files
|
|
15
|
+
- Semantically related code from elsewhere in the repo
|
|
16
|
+
|
|
17
|
+
Produce a briefing with exactly four sections:
|
|
18
|
+
|
|
19
|
+
1. WHAT CHANGED — 2-3 sentences. Describe the *intent* of the change, not the
|
|
20
|
+
lines. Do not list files. If you can't tell the intent, say so.
|
|
21
|
+
|
|
22
|
+
2. BLAST RADIUS — Which callers, services, contracts, or data could break?
|
|
23
|
+
Be specific. If the change is internal and self-contained, write "Self-contained."
|
|
24
|
+
|
|
25
|
+
3. RISK FLAGS — Bullet list. Only include flags that are actually present.
|
|
26
|
+
If none, write "None."
|
|
27
|
+
|
|
28
|
+
4. QUESTIONS — Exactly three questions a senior reviewer would ask before
|
|
29
|
+
approving. Questions must be answerable and specific. Bad question:
|
|
30
|
+
"Did you test this?" Good question: "The new retry loop in fetch_user
|
|
31
|
+
has no backoff — is that intentional given this is called per-request?"
|
|
32
|
+
|
|
33
|
+
Rules:
|
|
34
|
+
- Be terse. Aim for under 200 words total.
|
|
35
|
+
- No praise. No "this looks good." No emojis except the section icons.
|
|
36
|
+
- If the PR is trivial (typo fix, doc change), say so in one line and skip
|
|
37
|
+
the other sections.
|
|
38
|
+
- Never speculate about things you can't see. If you don't have the context,
|
|
39
|
+
say "Cannot tell from diff."
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
FIX_SYSTEM_PROMPT = """You are a senior backend engineer. A static-analysis heuristic has flagged \
|
|
43
|
+
a specific line in a pull request. Your job is to suggest a minimal, correct fix — or decline \
|
|
44
|
+
if you are not confident.
|
|
45
|
+
|
|
46
|
+
You will receive:
|
|
47
|
+
- The flag type and its location (file and line number)
|
|
48
|
+
- The flagged code snippet
|
|
49
|
+
- The surrounding diff context (lines prefixed with +, -, or space)
|
|
50
|
+
|
|
51
|
+
Respond in this EXACT format with no extra text before or after:
|
|
52
|
+
|
|
53
|
+
CONFIDENCE: <high|medium|low>
|
|
54
|
+
RATIONALE: <one sentence explaining the problem and what the fix addresses>
|
|
55
|
+
PATCH:
|
|
56
|
+
<the replacement code for the flagged line(s), or NO_PATCH if confidence is low>
|
|
57
|
+
|
|
58
|
+
Rules:
|
|
59
|
+
- CONFIDENCE must be high, medium, or low — no other values.
|
|
60
|
+
- If you are not confident the patch is correct and complete, label it low.
|
|
61
|
+
- A wrong fix is worse than no fix. When in doubt, choose low.
|
|
62
|
+
- PATCH must be a drop-in replacement for the flagged line(s) only.
|
|
63
|
+
Do not include surrounding context lines in the patch.
|
|
64
|
+
- If CONFIDENCE is low, write NO_PATCH on the PATCH line.
|
|
65
|
+
- The PATCH must be syntactically valid for the file's language.
|
|
66
|
+
- Never produce a suggestion block for vague observations; only for concrete, specific problems.
|
|
67
|
+
"""
|
src/cli.py
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""Typer CLI — the single entrypoint; orchestrates fetch-diff, analyze, summarize, post."""
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import typer
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
|
|
9
|
+
from src.analyzers.ast_walker import extract_changed_symbols
|
|
10
|
+
from src.analyzers.diff_parser import FileChange, parse_diff
|
|
11
|
+
from src.analyzers.risk_scorer import score
|
|
12
|
+
from src.briefing.generator import Briefing, generate_briefing
|
|
13
|
+
from src.config import get_failover_provider, is_fixes_enabled
|
|
14
|
+
from src.context.codebase_index import CodebaseIndex, RelatedChunk
|
|
15
|
+
from src.context.git_history import FileHistory, RecentPR, get_file_histories, get_recent_merged_prs
|
|
16
|
+
from src.fixes.fix_generator import generate_fixes
|
|
17
|
+
from src.github_api.comment_poster import fetch_pr_diff, format_fix_section, post_pr_comment
|
|
18
|
+
|
|
19
|
+
load_dotenv()
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
app = typer.Typer(help="PR Context Engine — brief a pull request.")
|
|
23
|
+
|
|
24
|
+
_MAX_DIFF_LINES = 4_000 # ~8k tokens; avoids hitting provider context limits on large PRs
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.callback()
|
|
28
|
+
def main() -> None:
|
|
29
|
+
"""PR Context Engine — brief a pull request for human reviewers.
|
|
30
|
+
|
|
31
|
+
This callback exists so Typer keeps `review` as an explicit subcommand. A
|
|
32
|
+
single-command Typer app otherwise collapses the command and drops its name,
|
|
33
|
+
which would break the documented `pr-context-engine review ...` invocation.
|
|
34
|
+
"""
|
|
35
|
+
logging.basicConfig(level=logging.INFO)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@app.command()
|
|
39
|
+
def review(
|
|
40
|
+
pr: int = typer.Option(..., "--pr", help="Pull request number."),
|
|
41
|
+
repo: str = typer.Option(..., "--repo", help="Repository in owner/name form."),
|
|
42
|
+
github_token: str | None = typer.Option(
|
|
43
|
+
None, envvar="GITHUB_TOKEN", help="GitHub token with pull-requests:write."
|
|
44
|
+
),
|
|
45
|
+
enable_fixes: bool = typer.Option(
|
|
46
|
+
False,
|
|
47
|
+
"--enable-fixes/--no-enable-fixes",
|
|
48
|
+
envvar="ENABLE_FIXES",
|
|
49
|
+
help="Generate confidence-gated fix suggestions (opt-in, default off).",
|
|
50
|
+
),
|
|
51
|
+
dry_run: bool = typer.Option(
|
|
52
|
+
False,
|
|
53
|
+
"--dry-run",
|
|
54
|
+
help="Print the briefing to stdout instead of posting it to GitHub.",
|
|
55
|
+
),
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Fetch a PR's diff, analyze it structurally, and post an AI-generated briefing."""
|
|
58
|
+
if not github_token and not dry_run:
|
|
59
|
+
raise typer.BadParameter("GITHUB_TOKEN is not set (flag or env var).")
|
|
60
|
+
|
|
61
|
+
# Typer's envvar= already handles ENABLE_FIXES for CLI invocations; this
|
|
62
|
+
# covers programmatic callers of review() that bypass Typer argument parsing.
|
|
63
|
+
enable_fixes = enable_fixes or is_fixes_enabled()
|
|
64
|
+
|
|
65
|
+
raw_diff = fetch_pr_diff(repo, pr, github_token)
|
|
66
|
+
logger.info("Fetched diff (%d chars)", len(raw_diff))
|
|
67
|
+
|
|
68
|
+
changes = parse_diff(raw_diff)
|
|
69
|
+
logger.info("Parsed %d file changes", len(changes))
|
|
70
|
+
|
|
71
|
+
# Drop whole FileChanges once the running line budget is exhausted so the parser
|
|
72
|
+
# always sees complete hunks — slicing raw diff text mid-file leaves incomplete objects.
|
|
73
|
+
budget = _MAX_DIFF_LINES
|
|
74
|
+
trimmed: list[FileChange] = []
|
|
75
|
+
for change in changes:
|
|
76
|
+
file_lines = len(change.added_lines) + len(change.removed_lines)
|
|
77
|
+
if budget <= 0:
|
|
78
|
+
break
|
|
79
|
+
trimmed.append(change)
|
|
80
|
+
budget -= file_lines
|
|
81
|
+
if len(trimmed) < len(changes):
|
|
82
|
+
logger.warning("Dropped %d files beyond %d-line budget", len(changes) - len(trimmed), _MAX_DIFF_LINES)
|
|
83
|
+
changes = trimmed
|
|
84
|
+
|
|
85
|
+
changed_symbols: dict[str, list[str]] = {}
|
|
86
|
+
for change in changes:
|
|
87
|
+
syms = extract_changed_symbols(change)
|
|
88
|
+
if syms:
|
|
89
|
+
changed_symbols[change.path] = syms
|
|
90
|
+
|
|
91
|
+
flags = score(changes)
|
|
92
|
+
logger.info("Detected %d risk flags", len(flags))
|
|
93
|
+
|
|
94
|
+
related_code = _build_related_code(changes, changed_symbols)
|
|
95
|
+
git_history, recent_prs = _build_git_context(changes, repo, github_token)
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
provider = get_failover_provider()
|
|
99
|
+
except (RuntimeError, ValueError) as exc:
|
|
100
|
+
raise typer.BadParameter(str(exc)) from exc
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
briefing = generate_briefing(
|
|
104
|
+
provider, changes, changed_symbols, flags, related_code, git_history, recent_prs
|
|
105
|
+
)
|
|
106
|
+
except RuntimeError as exc:
|
|
107
|
+
logger.error("Briefing generation failed: %s", exc)
|
|
108
|
+
if dry_run:
|
|
109
|
+
typer.echo(_format_error(str(exc)))
|
|
110
|
+
elif github_token:
|
|
111
|
+
post_pr_comment(repo, pr, _format_error(str(exc)), github_token)
|
|
112
|
+
raise typer.Exit(code=1)
|
|
113
|
+
|
|
114
|
+
logger.info("Generated briefing sections (via %s)", provider.attribution())
|
|
115
|
+
|
|
116
|
+
fix_section = ""
|
|
117
|
+
if enable_fixes:
|
|
118
|
+
logger.info("Fix suggestions enabled — generating for eligible flags")
|
|
119
|
+
suggestions, extra_count = generate_fixes(provider, flags, changes)
|
|
120
|
+
logger.info(
|
|
121
|
+
"Generated %d fix suggestion(s) (%d skipped by cap)",
|
|
122
|
+
len(suggestions),
|
|
123
|
+
extra_count,
|
|
124
|
+
)
|
|
125
|
+
fix_section = format_fix_section(suggestions, extra_count)
|
|
126
|
+
|
|
127
|
+
comment_text = _format_briefing(
|
|
128
|
+
briefing,
|
|
129
|
+
provider_attribution=provider.attribution(),
|
|
130
|
+
fix_section=fix_section,
|
|
131
|
+
)
|
|
132
|
+
if dry_run:
|
|
133
|
+
typer.echo(comment_text)
|
|
134
|
+
logger.info("Dry-run mode — briefing printed to stdout, not posted to GitHub")
|
|
135
|
+
else:
|
|
136
|
+
if not github_token:
|
|
137
|
+
raise RuntimeError("github_token must be set when not in dry-run mode")
|
|
138
|
+
post_pr_comment(repo, pr, comment_text, github_token)
|
|
139
|
+
logger.info("Comment posted to %s PR #%d", repo, pr)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _build_related_code(
|
|
143
|
+
changes: list[FileChange],
|
|
144
|
+
changed_symbols: dict[str, list[str]],
|
|
145
|
+
) -> dict[str, list[RelatedChunk]]:
|
|
146
|
+
"""Build and query the codebase index for each changed file.
|
|
147
|
+
|
|
148
|
+
Returns an empty dict if indexing fails (e.g. sqlite-vec extension unavailable
|
|
149
|
+
on this platform) so the briefing still works without RAG context.
|
|
150
|
+
"""
|
|
151
|
+
try:
|
|
152
|
+
index = CodebaseIndex(repo_root=".")
|
|
153
|
+
index.build_or_update()
|
|
154
|
+
except Exception as exc:
|
|
155
|
+
logger.warning("Codebase index unavailable — skipping related-code context: %s", exc)
|
|
156
|
+
return {}
|
|
157
|
+
|
|
158
|
+
exclude = {c.path for c in changes}
|
|
159
|
+
related: dict[str, list[RelatedChunk]] = {}
|
|
160
|
+
for change in changes:
|
|
161
|
+
query_text = _file_change_query(change, changed_symbols.get(change.path, []))
|
|
162
|
+
chunks = index.query(query_text, exclude_paths=exclude, top_k=5)
|
|
163
|
+
if chunks:
|
|
164
|
+
related[change.path] = chunks
|
|
165
|
+
|
|
166
|
+
return related
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _file_change_query(change: FileChange, symbols: list[str]) -> str:
|
|
170
|
+
"""Build a query string representing a file change for embedding lookup."""
|
|
171
|
+
parts = [change.path]
|
|
172
|
+
if symbols:
|
|
173
|
+
parts.append("functions: " + ", ".join(symbols[:10]))
|
|
174
|
+
if change.added_lines:
|
|
175
|
+
parts.append("\n".join(change.added_lines[:20]))
|
|
176
|
+
return "\n".join(parts)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _build_git_context(
|
|
180
|
+
changes: list[FileChange],
|
|
181
|
+
repo: str,
|
|
182
|
+
github_token: str | None,
|
|
183
|
+
) -> tuple[dict[str, FileHistory], list[RecentPR]]:
|
|
184
|
+
"""Fetch git history and recent merged PRs for changed files.
|
|
185
|
+
|
|
186
|
+
Returns an empty dict/list on any failure so the briefing still works
|
|
187
|
+
without history context.
|
|
188
|
+
"""
|
|
189
|
+
file_paths = [c.path for c in changes]
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
git_history = get_file_histories(file_paths, repo_root=".")
|
|
193
|
+
logger.info("Fetched git history for %d files", len(git_history))
|
|
194
|
+
except Exception as exc:
|
|
195
|
+
logger.warning("Git history unavailable: %s", exc)
|
|
196
|
+
git_history = {}
|
|
197
|
+
|
|
198
|
+
recent_prs: list[RecentPR] = []
|
|
199
|
+
if github_token:
|
|
200
|
+
try:
|
|
201
|
+
recent_prs = get_recent_merged_prs(file_paths, repo, github_token, repo_root=".")
|
|
202
|
+
logger.info("Found %d recent merged PRs", len(recent_prs))
|
|
203
|
+
except Exception as exc:
|
|
204
|
+
logger.warning("Recent PR lookup failed: %s", exc)
|
|
205
|
+
|
|
206
|
+
return git_history, recent_prs
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _format_briefing(
|
|
210
|
+
briefing: Briefing,
|
|
211
|
+
provider_attribution: str | None = None,
|
|
212
|
+
fix_section: str = "",
|
|
213
|
+
) -> str:
|
|
214
|
+
"""Format structured briefing into markdown comment for GitHub.
|
|
215
|
+
|
|
216
|
+
Produces a professional briefing with sections for what changed, blast
|
|
217
|
+
radius, risk flags, and review questions. When fix_section is non-empty
|
|
218
|
+
(ENABLE_FIXES=true), it is inserted between the questions and the footer.
|
|
219
|
+
"""
|
|
220
|
+
via = f" via {provider_attribution}" if provider_attribution else ""
|
|
221
|
+
parts: list[str] = [
|
|
222
|
+
"## 🤖 PR Briefing\n",
|
|
223
|
+
"**What changed**\n",
|
|
224
|
+
briefing.what_changed,
|
|
225
|
+
"\n\n**Blast radius**\n",
|
|
226
|
+
briefing.blast_radius,
|
|
227
|
+
"\n\n**Risk flags**\n",
|
|
228
|
+
briefing.risk_flags,
|
|
229
|
+
"\n\n**Questions for the reviewer**\n",
|
|
230
|
+
briefing.questions,
|
|
231
|
+
"\n\n---\n",
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
if fix_section:
|
|
235
|
+
parts.append(fix_section)
|
|
236
|
+
parts.append("\n---\n")
|
|
237
|
+
|
|
238
|
+
parts.append(
|
|
239
|
+
f"\n<sub>Generated by [PR Context Engine](https://github.com/paramahastha/pr-context-engine){via}. "
|
|
240
|
+
"Not a substitute for human review.</sub>"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return "".join(parts)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _format_error(error: str) -> str:
|
|
247
|
+
"""Format a briefing-failure notice as a GitHub PR comment."""
|
|
248
|
+
return (
|
|
249
|
+
"## 🤖 PR Briefing\n\n"
|
|
250
|
+
f"**Briefing failed:** {error}\n\n"
|
|
251
|
+
"Check your API keys and rate limits.\n\n"
|
|
252
|
+
"---\n"
|
|
253
|
+
"\n<sub>Generated by [PR Context Engine](https://github.com/paramahastha/pr-context-engine). "
|
|
254
|
+
"Not a substitute for human review.</sub>"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@app.command()
|
|
259
|
+
def quickstart() -> None:
|
|
260
|
+
"""Check environment setup and print exactly what is missing before first use."""
|
|
261
|
+
ok = True
|
|
262
|
+
|
|
263
|
+
def check(name: str, present: bool, hint: str) -> None:
|
|
264
|
+
nonlocal ok
|
|
265
|
+
if present:
|
|
266
|
+
typer.echo(f" [ok] {name}")
|
|
267
|
+
else:
|
|
268
|
+
typer.echo(f" [!!] {name} — {hint}")
|
|
269
|
+
ok = False
|
|
270
|
+
|
|
271
|
+
provider = os.environ.get("LLM_PROVIDER", "groq")
|
|
272
|
+
typer.echo("\nChecking provider keys...")
|
|
273
|
+
if provider == "ollama":
|
|
274
|
+
typer.echo(" [ok] LLM_PROVIDER=ollama — no API key required")
|
|
275
|
+
else:
|
|
276
|
+
groq_key = bool(os.environ.get("GROQ_API_KEY"))
|
|
277
|
+
gemini_key = bool(os.environ.get("GEMINI_API_KEY"))
|
|
278
|
+
anthropic_key = bool(os.environ.get("ANTHROPIC_API_KEY"))
|
|
279
|
+
any_key = groq_key or gemini_key or anthropic_key
|
|
280
|
+
|
|
281
|
+
check("GROQ_API_KEY (default provider)", groq_key, "get a free key at https://console.groq.com/keys")
|
|
282
|
+
check("GEMINI_API_KEY (failover)", gemini_key, "optional but recommended — https://aistudio.google.com/apikey")
|
|
283
|
+
if not any_key:
|
|
284
|
+
typer.echo("\n At least one provider key is required. GROQ_API_KEY is the easiest free option.")
|
|
285
|
+
|
|
286
|
+
typer.echo("\nChecking GitHub token...")
|
|
287
|
+
gh_token = os.environ.get("GITHUB_TOKEN", "")
|
|
288
|
+
check("GITHUB_TOKEN", bool(gh_token), "set via `export GITHUB_TOKEN=$(gh auth token)` or pass --github-token")
|
|
289
|
+
|
|
290
|
+
if gh_token:
|
|
291
|
+
typer.echo("\nVerifying GitHub token scope...")
|
|
292
|
+
try:
|
|
293
|
+
resp = requests.get(
|
|
294
|
+
"https://api.github.com/user",
|
|
295
|
+
headers={"Authorization": f"Bearer {gh_token}", "X-GitHub-Api-Version": "2022-11-28"},
|
|
296
|
+
timeout=8,
|
|
297
|
+
)
|
|
298
|
+
if resp.status_code == 200:
|
|
299
|
+
login = resp.json().get("login", "unknown")
|
|
300
|
+
typer.echo(f" [ok] Authenticated as {login}")
|
|
301
|
+
scopes = resp.headers.get("X-OAuth-Scopes", "")
|
|
302
|
+
if not scopes:
|
|
303
|
+
# Fine-grained PATs don't expose X-OAuth-Scopes; assume correct permissions.
|
|
304
|
+
typer.echo(" [ok] Fine-grained PAT detected — scope check skipped")
|
|
305
|
+
else:
|
|
306
|
+
has_repo = any(s.strip() in ("repo", "public_repo") for s in scopes.split(","))
|
|
307
|
+
check(
|
|
308
|
+
"Token scope (repo or public_repo)",
|
|
309
|
+
has_repo,
|
|
310
|
+
"the token needs pull-requests:write; regenerate with repo scope",
|
|
311
|
+
)
|
|
312
|
+
else:
|
|
313
|
+
typer.echo(f" [!!] Token check failed (HTTP {resp.status_code}) — token may be invalid")
|
|
314
|
+
ok = False
|
|
315
|
+
except Exception as exc:
|
|
316
|
+
typer.echo(f" [!!] Could not reach GitHub API: {exc}")
|
|
317
|
+
ok = False
|
|
318
|
+
|
|
319
|
+
typer.echo("")
|
|
320
|
+
if ok:
|
|
321
|
+
typer.echo("All checks passed. Run a dry-run to see a briefing before granting write access:")
|
|
322
|
+
typer.echo(" pr-context-engine review --pr <N> --repo <owner/name> --dry-run")
|
|
323
|
+
else:
|
|
324
|
+
typer.echo("Fix the issues above, then re-run `pr-context-engine quickstart`.")
|
|
325
|
+
raise typer.Exit(code=1)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
if __name__ == "__main__":
|
|
329
|
+
app()
|