pr-context-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pr_context_engine-0.1.0.dist-info/METADATA +211 -0
- pr_context_engine-0.1.0.dist-info/RECORD +29 -0
- pr_context_engine-0.1.0.dist-info/WHEEL +4 -0
- pr_context_engine-0.1.0.dist-info/entry_points.txt +2 -0
- pr_context_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
- src/__init__.py +1 -0
- src/analyzers/__init__.py +1 -0
- src/analyzers/ast_walker.py +91 -0
- src/analyzers/diff_parser.py +158 -0
- src/analyzers/risk_scorer.py +121 -0
- src/briefing/__init__.py +5 -0
- src/briefing/generator.py +229 -0
- src/briefing/prompt_templates.py +67 -0
- src/cli.py +329 -0
- src/config.py +118 -0
- src/context/__init__.py +1 -0
- src/context/codebase_index.py +382 -0
- src/context/git_history.py +225 -0
- src/fixes/__init__.py +1 -0
- src/fixes/confidence.py +60 -0
- src/fixes/fix_generator.py +152 -0
- src/github_api/__init__.py +3 -0
- src/github_api/comment_poster.py +95 -0
- src/llm/__init__.py +106 -0
- src/llm/anthropic_provider.py +32 -0
- src/llm/base.py +11 -0
- src/llm/gemini_provider.py +33 -0
- src/llm/groq_provider.py +30 -0
- src/llm/ollama_provider.py +41 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pr-context-engine
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An AI tool that reads every PR and posts a senior-engineer-style briefing.
|
|
5
|
+
Project-URL: Homepage, https://github.com/paramahastha/pr-context-engine
|
|
6
|
+
Project-URL: Repository, https://github.com/paramahastha/pr-context-engine
|
|
7
|
+
Project-URL: Issues, https://github.com/paramahastha/pr-context-engine/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/paramahastha/pr-context-engine/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: Kautsar <paramahastha@gmail.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: ai,code-review,github,llm,pull-request
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Version Control :: Git
|
|
19
|
+
Requires-Python: >=3.12
|
|
20
|
+
Requires-Dist: anthropic>=0.40
|
|
21
|
+
Requires-Dist: fastembed>=0.4
|
|
22
|
+
Requires-Dist: google-genai>=1.0
|
|
23
|
+
Requires-Dist: groq>=0.13
|
|
24
|
+
Requires-Dist: pygithub>=2.4
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0
|
|
26
|
+
Requires-Dist: requests>=2.32
|
|
27
|
+
Requires-Dist: sqlite-vec>=0.1
|
|
28
|
+
Requires-Dist: typer>=0.12
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# PR Context Engine
|
|
32
|
+
|
|
33
|
+
[](https://github.com/paramahastha/pr-context-engine/actions/workflows/pr-review.yml)
|
|
34
|
+
[](https://pypi.org/project/pr-context-engine/)
|
|
35
|
+
[](LICENSE)
|
|
36
|
+
[](https://www.python.org/downloads/)
|
|
37
|
+
|
|
38
|
+
> An AI tool that reads every PR and writes the briefing — and the fixes — a senior engineer would, with the calibration data to prove it's not just guessing.
|
|
39
|
+
|
|
40
|
+
## What it does
|
|
41
|
+
|
|
42
|
+
Every PR opens with three problems for the reviewer: _what is this actually doing_, _what could it break_, and _what should I push back on_. A diff doesn't answer any of those.
|
|
43
|
+
|
|
44
|
+
PR Context Engine reads the diff plus surrounding code, recent git history, and semantically similar code from elsewhere in the repo, then posts a terse briefing written like a senior backend engineer would write it:
|
|
45
|
+
|
|
46
|
+
```markdown
|
|
47
|
+
## PR Briefing
|
|
48
|
+
|
|
49
|
+
**What changed**
|
|
50
|
+
Refactors the session token storage from an in-memory dict to Redis, adding a
|
|
51
|
+
configurable TTL. The auth middleware is updated to hit Redis on every request.
|
|
52
|
+
|
|
53
|
+
**Blast radius**
|
|
54
|
+
Any caller of `get_session()` now depends on Redis being reachable. If Redis is
|
|
55
|
+
down, all authenticated requests will 401. The previous in-memory store had no
|
|
56
|
+
such single point of failure.
|
|
57
|
+
|
|
58
|
+
**Risk flags**
|
|
59
|
+
- `modifies_auth`: src/auth/session.py line 42 — `token = generate_token(user_id)`
|
|
60
|
+
|
|
61
|
+
**Questions for the reviewer**
|
|
62
|
+
|
|
63
|
+
1. The Redis client is initialised once at import time — is there a reconnect
|
|
64
|
+
strategy if the connection drops mid-deploy?
|
|
65
|
+
2. `SESSION_TTL` defaults to 3600 but the old in-memory store had no TTL — have
|
|
66
|
+
existing sessions been migrated or will they all expire immediately after deploy?
|
|
67
|
+
3. There are no tests for the Redis-down path — is 401-on-outage the intended
|
|
68
|
+
degradation, or should it fall back to the old store?
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
No praise. No filler. No "this LGTM." Just the context a reviewer needs.
|
|
72
|
+
|
|
73
|
+
## Quickstart (5 minutes)
|
|
74
|
+
|
|
75
|
+
### Option A — GitHub Action (recommended)
|
|
76
|
+
|
|
77
|
+
1. Get a free [Groq API key](https://console.groq.com/keys) — no credit card.
|
|
78
|
+
2. Add it as a secret: **Settings → Secrets → Actions → New secret** → `GROQ_API_KEY`.
|
|
79
|
+
3. Enable write permissions: **Settings → Actions → General → Workflow permissions → Read and write**.
|
|
80
|
+
4. Add this to `.github/workflows/pr-briefing.yml`:
|
|
81
|
+
|
|
82
|
+
```yaml
|
|
83
|
+
name: PR Briefing
|
|
84
|
+
on:
|
|
85
|
+
pull_request:
|
|
86
|
+
types: [opened, synchronize, reopened]
|
|
87
|
+
jobs:
|
|
88
|
+
brief:
|
|
89
|
+
runs-on: ubuntu-latest
|
|
90
|
+
permissions:
|
|
91
|
+
pull-requests: write
|
|
92
|
+
contents: read
|
|
93
|
+
steps:
|
|
94
|
+
- uses: paramahastha/pr-context-engine@main
|
|
95
|
+
with:
|
|
96
|
+
groq-api-key: ${{ secrets.GROQ_API_KEY }}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
That's it. Every new PR gets a briefing comment automatically.
|
|
100
|
+
|
|
101
|
+
### Option B — CLI (any CI or local)
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pipx install pr-context-engine
|
|
105
|
+
export GROQ_API_KEY=<your-groq-key>
|
|
106
|
+
export GITHUB_TOKEN=$(gh auth token)
|
|
107
|
+
|
|
108
|
+
# Check your setup first
|
|
109
|
+
pr-context-engine quickstart
|
|
110
|
+
|
|
111
|
+
# Dry-run: see the briefing without posting it
|
|
112
|
+
pr-context-engine review --pr 42 --repo owner/name --dry-run
|
|
113
|
+
|
|
114
|
+
# Post the real comment
|
|
115
|
+
pr-context-engine review --pr 42 --repo owner/name
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Switching LLM providers
|
|
119
|
+
|
|
120
|
+
Set `LLM_PROVIDER` to any of `groq` (default), `gemini`, `ollama`, or `anthropic`. Nothing downstream changes.
|
|
121
|
+
|
|
122
|
+
| Provider | Key env var | Notes |
|
|
123
|
+
|---|---|---|
|
|
124
|
+
| `groq` *(default)* | `GROQ_API_KEY` | Free, ~1 000 req/day, fast |
|
|
125
|
+
| `gemini` | `GEMINI_API_KEY` | Free-tier fallback; auto-engaged on Groq 429 |
|
|
126
|
+
| `ollama` | — | Local, offline, no rate limits |
|
|
127
|
+
| `anthropic` | `ANTHROPIC_API_KEY` | BYO key, no free tier |
|
|
128
|
+
|
|
129
|
+
**Automatic failover:** if `GEMINI_API_KEY` is set, the tool fails over to Gemini on any Groq 429 or error and notes it in the PR comment footer. See [ADR-7](docs/design-decisions.md).
|
|
130
|
+
|
|
131
|
+
## Fix suggestions (opt-in)
|
|
132
|
+
|
|
133
|
+
When `ENABLE_FIXES=true`, the tool generates confidence-gated patch suggestions for located issues. Only `high`/`medium` confidence suggestions become one-click GitHub suggestion blocks; `low` confidence produces prose notes only. Max 3 suggestions per PR.
|
|
134
|
+
|
|
135
|
+
```yaml
|
|
136
|
+
- uses: paramahastha/pr-context-engine@main
|
|
137
|
+
with:
|
|
138
|
+
groq-api-key: ${{ secrets.GROQ_API_KEY }}
|
|
139
|
+
enable-fixes: "true"
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
See [ADR-5](docs/design-decisions.md) for why this is opt-in and confidence-gated.
|
|
143
|
+
|
|
144
|
+
## Eval results
|
|
145
|
+
|
|
146
|
+
`pytest tests/eval/` produces a scorecard across five rubric dimensions (Accuracy, Blast radius, Risk flags, Question quality, Brevity) plus fix correctness and calibration rate.
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
pytest tests/eval/ -v
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Results are committed to `tests/eval/scores/` so improvements are visible in git history. The headline metrics are **fix correctness rate** and **false-confidence rate** (when the model said `high` confidence, how often was the patch actually correct).
|
|
153
|
+
|
|
154
|
+
## Architecture
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
Front door A: Front door B:
|
|
158
|
+
GitHub Action wrapper pipx install + run in any CI / locally
|
|
159
|
+
(paramahastha/pr-context-engine@main)
|
|
160
|
+
│ │
|
|
161
|
+
└────────────┬────────────────────┘
|
|
162
|
+
▼
|
|
163
|
+
┌─────────────────────────────────────┐
|
|
164
|
+
│ CLI core (src/cli.py + orchestrator)│
|
|
165
|
+
└─────────────────────────────────────┘
|
|
166
|
+
│
|
|
167
|
+
├──► analyzers/ diff → FileChange objects, AST symbols, risk flags
|
|
168
|
+
├──► context/ git history, sqlite-vec codebase index (RAG)
|
|
169
|
+
├──► briefing/ prompt assembly → LLM call → structured output
|
|
170
|
+
├──► fixes/ confidence-gated patch suggestions (opt-in)
|
|
171
|
+
├──► llm/ pluggable providers + FailoverProvider
|
|
172
|
+
└──► github_api/ fetch diff, post comment + suggestion blocks
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
The CLI is the product; the GitHub Action is a thin wrapper. See [docs/architecture.md](docs/architecture.md) and [docs/design-decisions.md](docs/design-decisions.md).
|
|
176
|
+
|
|
177
|
+
## Data & privacy
|
|
178
|
+
|
|
179
|
+
**What leaves your machine:**
|
|
180
|
+
|
|
181
|
+
- The PR diff and parsed metadata (file paths, function names, changed lines) are sent to the active LLM provider (Groq or Gemini by default).
|
|
182
|
+
- No source code beyond the diff is sent to any external API. The codebase index (RAG) runs entirely locally via `fastembed` + `sqlite-vec`.
|
|
183
|
+
- Git history and PR metadata are fetched from the GitHub API using your `GITHUB_TOKEN`.
|
|
184
|
+
|
|
185
|
+
**Provider data policies:**
|
|
186
|
+
|
|
187
|
+
- Groq and Gemini free tiers may use inputs for model improvement. See their respective privacy policies before using on private/sensitive repos.
|
|
188
|
+
- Use `LLM_PROVIDER=ollama` or `LLM_PROVIDER=anthropic` (with `ANTHROPIC_API_KEY`) if you need a provider with stronger data-isolation guarantees.
|
|
189
|
+
- The tool has no shared backend. Your API key, your quota, your data.
|
|
190
|
+
|
|
191
|
+
## Configuration
|
|
192
|
+
|
|
193
|
+
See [CONFIG.md](CONFIG.md) for the full reference of every env var and flag.
|
|
194
|
+
|
|
195
|
+
## Design decisions
|
|
196
|
+
|
|
197
|
+
See [docs/design-decisions.md](docs/design-decisions.md) for ADRs covering: why provider abstraction is built early, why SQLite over Pinecone, why fixes are opt-in, why MIT license, and more.
|
|
198
|
+
|
|
199
|
+
## Cost
|
|
200
|
+
|
|
201
|
+
**$0/month** for a portfolio-scale project on public repos.
|
|
202
|
+
|
|
203
|
+
- GitHub Actions: free for public repos.
|
|
204
|
+
- Groq: free tier, ~1 000 req/day.
|
|
205
|
+
- Gemini fallback: free tier (~1 500 req/day).
|
|
206
|
+
- Local embeddings (`fastembed`): $0, no API.
|
|
207
|
+
- The tool has no shared backend — your usage costs stay yours regardless of how many repos adopt it.
|
|
208
|
+
|
|
209
|
+
## Contributing
|
|
210
|
+
|
|
211
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md). Bug reports and feature requests go in [Issues](https://github.com/paramahastha/pr-context-engine/issues).
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
src/__init__.py,sha256=9U8spJ1Nik8O_SIaxHfvCziQNnDznBLkFLTmwh6PTWo,86
|
|
2
|
+
src/cli.py,sha256=7N38jtvZaUf0-By3InV5amvJu_V3EBVz8cOTbWiPBdk,12485
|
|
3
|
+
src/config.py,sha256=mtKU-yQ08yW97Q0q5vm1WKzcmP_HuygI1_Z6Xlh3mmw,4330
|
|
4
|
+
src/analyzers/__init__.py,sha256=jFjeVTITbZkwyBJdDaPNmqAWUKoRSW9anPM6ehC118E,67
|
|
5
|
+
src/analyzers/ast_walker.py,sha256=diPpIkkHCUoBc3BajhnQcMEqP9WS6xgkRknb9aNQGhQ,3077
|
|
6
|
+
src/analyzers/diff_parser.py,sha256=BQxARCilVCOEKCCtIueFJ97Pqfcppmi7-1hs6czy660,4597
|
|
7
|
+
src/analyzers/risk_scorer.py,sha256=xorEGUJufNkSIwByI74GEiSIS19WGmwwQ3FwXxqC4wE,4432
|
|
8
|
+
src/briefing/__init__.py,sha256=BbZvpF8rkJCMMa-wHOrP8d6_63sr1gEqBTs_at8GYng,268
|
|
9
|
+
src/briefing/generator.py,sha256=dwgNQQZVPYWUFnqANmjF8D1VWjXDh7DPCXX0SYOuPSI,8524
|
|
10
|
+
src/briefing/prompt_templates.py,sha256=hETpUrnCnFVAUM9N1kBhspRU2f4TVSlJJx0hgYuQj7w,2977
|
|
11
|
+
src/context/__init__.py,sha256=bvCr3d-RGYzl9TJKB45FCsYV-jKqZdLj8kCWnMXD9A4,63
|
|
12
|
+
src/context/codebase_index.py,sha256=BoL43v7gx7WsFKiWWgA3rzMVFqB3r3pDXoCB_W68zTE,12855
|
|
13
|
+
src/context/git_history.py,sha256=xH6-tzQcm98eZgvif8Tdtrn1OpCdy3TUdlqZgLkGlLE,6998
|
|
14
|
+
src/fixes/__init__.py,sha256=AKT9jw7a5t7N4CgExQFXwzkPoVzqMQxjYbwO2asuAKU,77
|
|
15
|
+
src/fixes/confidence.py,sha256=XHCYz-Gvfo1x13iqgsiXoOWpscURbZ_zTlKXU7QmbF4,2362
|
|
16
|
+
src/fixes/fix_generator.py,sha256=wVhWw2mGJyrB6YkAqKXkbzWlXmbgVDDqAtllSrIOES4,5246
|
|
17
|
+
src/github_api/__init__.py,sha256=Yfp6ghTndiB9W8ZwihQtn7sV2j1W2sjEqP7Rt-8_AZ0,119
|
|
18
|
+
src/github_api/comment_poster.py,sha256=MABH2IXgrkxtYdurNKDop-HXWYw8Yo_Pw3cR7wvQ1i8,3606
|
|
19
|
+
src/llm/__init__.py,sha256=-Cq5eXoq-PA8eCXMTVpobExO1WDVoWopDcFp1-XDK1M,3992
|
|
20
|
+
src/llm/anthropic_provider.py,sha256=CiyOy_39NoMeCJtFKnGb6wJq80cTvRuztof3HBm6MDk,1115
|
|
21
|
+
src/llm/base.py,sha256=znllM7OL2-bUuMiQWZXfso4CEgKdw2brnfnCoK2FZSc,380
|
|
22
|
+
src/llm/gemini_provider.py,sha256=Ebt9U5PokIv6zo6TMIocjyqsTF4h5YDfmMbnkmH1zmg,1108
|
|
23
|
+
src/llm/groq_provider.py,sha256=AfxWuHqweiR32A4k9fgI65Hg5rRWnN70YpbF9vpia-g,1024
|
|
24
|
+
src/llm/ollama_provider.py,sha256=8Cuk-XC5wAmtxbEJPu5xtDjKwskVD1Zzdys9cbOlHVw,1338
|
|
25
|
+
pr_context_engine-0.1.0.dist-info/METADATA,sha256=Nu0j59OAfsK0QSdQfAJ5spS3wAbNdrim7q9RlIWjrUg,9135
|
|
26
|
+
pr_context_engine-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
27
|
+
pr_context_engine-0.1.0.dist-info/entry_points.txt,sha256=2oEE7GUgLRI-AEZjdwZNAUWozo0WSZ4CdFWTpY3wBfM,50
|
|
28
|
+
pr_context_engine-0.1.0.dist-info/licenses/LICENSE,sha256=loMg3YX5UJ1jw9YfqlBrQRWiBb60WkMTM6OcwlLheIk,1064
|
|
29
|
+
pr_context_engine-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kautsar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""PR Context Engine — an AI tool that briefs pull requests for human reviewers."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Diff analysis: parsing, symbol extraction, and risk scoring."""
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Extract names of changed functions and classes from a FileChange.
|
|
2
|
+
|
|
3
|
+
Uses Python's `ast` module for Python files and language-specific regexes
|
|
4
|
+
for JavaScript, TypeScript, and Go. Falls back to a generic regex for unknown
|
|
5
|
+
languages. Only lines that appear in the diff (added or removed) are scanned —
|
|
6
|
+
we report which named symbols were touched, not a full symbol table.
|
|
7
|
+
"""
|
|
8
|
+
import ast
|
|
9
|
+
import logging
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
from src.analyzers.diff_parser import FileChange
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Patterns keyed by language; each pattern has one capturing group: the symbol name.
|
|
17
|
+
_PATTERNS: dict[str, list[re.Pattern[str]]] = {
|
|
18
|
+
"python": [
|
|
19
|
+
re.compile(r"^(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)"),
|
|
20
|
+
re.compile(r"^class\s+([A-Za-z_][A-Za-z0-9_]*)"),
|
|
21
|
+
],
|
|
22
|
+
"javascript": [
|
|
23
|
+
re.compile(r"^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)"),
|
|
24
|
+
re.compile(r"^(?:export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)"),
|
|
25
|
+
re.compile(
|
|
26
|
+
r"^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)"
|
|
27
|
+
r"\s*=\s*(?:async\s+)?(?:function|\()"
|
|
28
|
+
),
|
|
29
|
+
],
|
|
30
|
+
"go": [
|
|
31
|
+
re.compile(r"^func\s+(?:\([^)]+\)\s+)?([A-Za-z_][A-Za-z0-9_]*)"),
|
|
32
|
+
re.compile(r"^type\s+([A-Za-z_][A-Za-z0-9_]*)\s+struct"),
|
|
33
|
+
],
|
|
34
|
+
}
|
|
35
|
+
_PATTERNS["typescript"] = _PATTERNS["javascript"]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _names_via_regex(lines: list[str], language: str) -> list[str]:
|
|
39
|
+
patterns = _PATTERNS.get(language, [])
|
|
40
|
+
names: list[str] = []
|
|
41
|
+
for line in lines:
|
|
42
|
+
stripped = line.strip()
|
|
43
|
+
for pat in patterns:
|
|
44
|
+
m = pat.match(stripped)
|
|
45
|
+
if m:
|
|
46
|
+
names.append(m.group(1))
|
|
47
|
+
break
|
|
48
|
+
return names
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _names_via_ast(lines: list[str]) -> list[str]:
|
|
52
|
+
"""Try to parse Python lines as a module and extract def/class names at any depth.
|
|
53
|
+
|
|
54
|
+
Returns an empty list on any parse failure — the caller falls back to regex.
|
|
55
|
+
"""
|
|
56
|
+
source = "\n".join(lines)
|
|
57
|
+
try:
|
|
58
|
+
tree = ast.parse(source)
|
|
59
|
+
except SyntaxError:
|
|
60
|
+
return []
|
|
61
|
+
names: list[str] = []
|
|
62
|
+
for node in ast.walk(tree):
|
|
63
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
64
|
+
names.append(node.name)
|
|
65
|
+
return names
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def extract_changed_symbols(change: FileChange) -> list[str]:
|
|
69
|
+
"""Return deduplicated names of functions/classes touched in this file's diff.
|
|
70
|
+
|
|
71
|
+
Combines names found in both added and removed lines so the caller knows
|
|
72
|
+
which symbols were modified (added, changed, or deleted).
|
|
73
|
+
"""
|
|
74
|
+
all_lines = change.added_lines + change.removed_lines
|
|
75
|
+
|
|
76
|
+
names: list[str] = []
|
|
77
|
+
if change.language == "python":
|
|
78
|
+
names = _names_via_ast(all_lines)
|
|
79
|
+
if not names:
|
|
80
|
+
names = _names_via_regex(all_lines, "python")
|
|
81
|
+
else:
|
|
82
|
+
names = _names_via_regex(all_lines, change.language)
|
|
83
|
+
|
|
84
|
+
# preserve order while deduplicating
|
|
85
|
+
seen: set[str] = set()
|
|
86
|
+
unique: list[str] = []
|
|
87
|
+
for name in names:
|
|
88
|
+
if name not in seen:
|
|
89
|
+
seen.add(name)
|
|
90
|
+
unique.append(name)
|
|
91
|
+
return unique
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""Parse a unified diff string into structured FileChange objects."""
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
|
|
6
|
+
_EXT_LANG: dict[str, str] = {
|
|
7
|
+
".py": "python",
|
|
8
|
+
".js": "javascript",
|
|
9
|
+
".jsx": "javascript",
|
|
10
|
+
".ts": "typescript",
|
|
11
|
+
".tsx": "typescript",
|
|
12
|
+
".go": "go",
|
|
13
|
+
".rb": "ruby",
|
|
14
|
+
".java": "java",
|
|
15
|
+
".rs": "rust",
|
|
16
|
+
".c": "c",
|
|
17
|
+
".cpp": "cpp",
|
|
18
|
+
".cs": "csharp",
|
|
19
|
+
".php": "php",
|
|
20
|
+
".sh": "shell",
|
|
21
|
+
".yaml": "yaml",
|
|
22
|
+
".yml": "yaml",
|
|
23
|
+
".json": "json",
|
|
24
|
+
".toml": "toml",
|
|
25
|
+
".sql": "sql",
|
|
26
|
+
".md": "markdown",
|
|
27
|
+
".html": "html",
|
|
28
|
+
".css": "css",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
def detect_language(path: str) -> str:
|
|
32
|
+
"""Return the markdown fence language identifier for a file path, or empty string."""
|
|
33
|
+
return _EXT_LANG.get(os.path.splitext(path)[1].lower(), "")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_HUNK_RE = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class Hunk:
|
|
41
|
+
"""One @@ block from a unified diff, with its raw diff lines."""
|
|
42
|
+
|
|
43
|
+
old_start: int
|
|
44
|
+
old_count: int
|
|
45
|
+
new_start: int
|
|
46
|
+
new_count: int
|
|
47
|
+
lines: list[str] = field(default_factory=list)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class FileChange:
|
|
52
|
+
"""All changes for a single file extracted from a unified diff."""
|
|
53
|
+
|
|
54
|
+
path: str
|
|
55
|
+
language: str
|
|
56
|
+
added_lines: list[str]
|
|
57
|
+
removed_lines: list[str]
|
|
58
|
+
hunks: list[Hunk]
|
|
59
|
+
is_new_file: bool = False
|
|
60
|
+
is_deleted_file: bool = False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _detect_language(path: str) -> str:
|
|
64
|
+
if "." not in path:
|
|
65
|
+
return "unknown"
|
|
66
|
+
suffix = "." + path.rsplit(".", 1)[-1].lower()
|
|
67
|
+
return _EXT_LANG.get(suffix, "unknown")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def parse_diff(diff_text: str) -> list[FileChange]:
|
|
71
|
+
"""Parse a unified diff string into a list of FileChange objects.
|
|
72
|
+
|
|
73
|
+
Handles new files (--- /dev/null), deleted files (+++ /dev/null), and
|
|
74
|
+
standard modifications. Each FileChange includes per-hunk line data with
|
|
75
|
+
positional information needed by the risk scorer.
|
|
76
|
+
"""
|
|
77
|
+
changes: list[FileChange] = []
|
|
78
|
+
current: FileChange | None = None
|
|
79
|
+
current_hunk: Hunk | None = None
|
|
80
|
+
pending_new = False
|
|
81
|
+
pending_deleted = False
|
|
82
|
+
pending_old_path = ""
|
|
83
|
+
|
|
84
|
+
def _push_hunk() -> None:
|
|
85
|
+
nonlocal current_hunk
|
|
86
|
+
if current is not None and current_hunk is not None:
|
|
87
|
+
current.hunks.append(current_hunk)
|
|
88
|
+
current_hunk = None
|
|
89
|
+
|
|
90
|
+
def _push_file() -> None:
|
|
91
|
+
nonlocal current, pending_new, pending_deleted, pending_old_path
|
|
92
|
+
_push_hunk()
|
|
93
|
+
if current is not None:
|
|
94
|
+
changes.append(current)
|
|
95
|
+
current = None
|
|
96
|
+
pending_new = False
|
|
97
|
+
pending_deleted = False
|
|
98
|
+
pending_old_path = ""
|
|
99
|
+
|
|
100
|
+
for line in diff_text.splitlines():
|
|
101
|
+
if line.startswith("diff --git "):
|
|
102
|
+
_push_file()
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
if line.startswith("new file mode"):
|
|
106
|
+
pending_new = True
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
if line.startswith("deleted file mode"):
|
|
110
|
+
pending_deleted = True
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
if line.startswith("--- "):
|
|
114
|
+
raw = line[4:]
|
|
115
|
+
pending_old_path = raw[2:] if raw.startswith("a/") else raw
|
|
116
|
+
if pending_old_path == "/dev/null":
|
|
117
|
+
pending_new = True
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
if line.startswith("+++ "):
|
|
121
|
+
_push_hunk()
|
|
122
|
+
raw = line[4:]
|
|
123
|
+
new_path = raw[2:] if raw.startswith("b/") else raw
|
|
124
|
+
if new_path == "/dev/null":
|
|
125
|
+
pending_deleted = True
|
|
126
|
+
new_path = pending_old_path
|
|
127
|
+
current = FileChange(
|
|
128
|
+
path=new_path,
|
|
129
|
+
language=_detect_language(new_path),
|
|
130
|
+
added_lines=[],
|
|
131
|
+
removed_lines=[],
|
|
132
|
+
hunks=[],
|
|
133
|
+
is_new_file=pending_new,
|
|
134
|
+
is_deleted_file=pending_deleted,
|
|
135
|
+
)
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
if line.startswith("@@") and current is not None:
|
|
139
|
+
_push_hunk()
|
|
140
|
+
m = _HUNK_RE.match(line)
|
|
141
|
+
if m:
|
|
142
|
+
current_hunk = Hunk(
|
|
143
|
+
old_start=int(m.group(1)),
|
|
144
|
+
old_count=int(m.group(2) or "1"),
|
|
145
|
+
new_start=int(m.group(3)),
|
|
146
|
+
new_count=int(m.group(4) or "1"),
|
|
147
|
+
)
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
if current_hunk is not None and current is not None:
|
|
151
|
+
current_hunk.lines.append(line)
|
|
152
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
153
|
+
current.added_lines.append(line[1:])
|
|
154
|
+
elif line.startswith("-") and not line.startswith("---"):
|
|
155
|
+
current.removed_lines.append(line[1:])
|
|
156
|
+
|
|
157
|
+
_push_file()
|
|
158
|
+
return changes
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Heuristic risk-flag detection over parsed diff changes.
|
|
2
|
+
|
|
3
|
+
Each flag is a located issue object carrying enough information for Milestone 8's
|
|
4
|
+
fix generator: which file, which line (or None when a specific line doesn't apply),
|
|
5
|
+
and a short snippet. Flags where `line` is None are briefing-only and never fix-eligible.
|
|
6
|
+
"""
|
|
7
|
+
import re
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from src.analyzers.diff_parser import FileChange
|
|
12
|
+
|
|
13
|
+
_AUTH_RE = re.compile(
|
|
14
|
+
# Use letter-only boundaries so compound names like auth_token / AUTH_SECRET match.
|
|
15
|
+
r"(?<![a-zA-Z])(auth|token|password|secret|permission|credential|api_key|apikey)(?![a-zA-Z])",
|
|
16
|
+
re.IGNORECASE,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Top-level function/method definition patterns for public-API deletion detection.
|
|
20
|
+
# We only match lines with no leading whitespace (top-level scope).
|
|
21
|
+
_FUNC_DEF_RE = re.compile(
|
|
22
|
+
r"^(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)" # Python
|
|
23
|
+
r"|^func\s+(?:\([^)]+\)\s+)?([A-Za-z_][A-Za-z0-9_]*)" # Go
|
|
24
|
+
r"|^export\s+(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)" # JS/TS named function
|
|
25
|
+
r"|^export\s+(?:const|let)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*(?:async\s+)?\(" # JS/TS arrow
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_MIGRATION_MARKERS = ("migrations/", "alembic/", "alembic_migrations/")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class RiskFlag:
|
|
33
|
+
"""A located risk signal from heuristic analysis of a diff.
|
|
34
|
+
|
|
35
|
+
`line` is the line number in the relevant file version:
|
|
36
|
+
- new-file line for `modifies_auth` (the code being added)
|
|
37
|
+
- old-file line for `deletes_public_api` (where the definition existed)
|
|
38
|
+
- None for whole-file flags (`touches_migration`, `changes_config`)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
flag: str
|
|
42
|
+
file: str
|
|
43
|
+
line: int | None
|
|
44
|
+
snippet: str
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_migration(path: str) -> bool:
|
|
48
|
+
lower = path.lower()
|
|
49
|
+
return any(m in lower for m in _MIGRATION_MARKERS) or lower.endswith(".sql")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _is_config(path: str) -> bool:
|
|
53
|
+
"""True for .env*, config.*, or *.yaml/yml files at the repo root."""
|
|
54
|
+
name = Path(path).name.lower()
|
|
55
|
+
parts = path.replace("\\", "/").split("/")
|
|
56
|
+
at_root = len(parts) == 1
|
|
57
|
+
|
|
58
|
+
if name.startswith(".env"):
|
|
59
|
+
return True
|
|
60
|
+
if name.startswith("config.") and at_root:
|
|
61
|
+
return True
|
|
62
|
+
if name.endswith((".yaml", ".yml")) and at_root:
|
|
63
|
+
return True
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def score(changes: list[FileChange]) -> list[RiskFlag]:
|
|
68
|
+
"""Return all risk flags detected across the list of file changes."""
|
|
69
|
+
flags: list[RiskFlag] = []
|
|
70
|
+
|
|
71
|
+
for change in changes:
|
|
72
|
+
if _is_migration(change.path):
|
|
73
|
+
flags.append(
|
|
74
|
+
RiskFlag(flag="touches_migration", file=change.path, line=None, snippet=change.path)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
if _is_config(change.path):
|
|
78
|
+
flags.append(
|
|
79
|
+
RiskFlag(flag="changes_config", file=change.path, line=None, snippet=change.path)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
for hunk in change.hunks:
|
|
83
|
+
new_lineno = hunk.new_start
|
|
84
|
+
old_lineno = hunk.old_start
|
|
85
|
+
|
|
86
|
+
for raw in hunk.lines:
|
|
87
|
+
if raw.startswith("+") and not raw.startswith("+++"):
|
|
88
|
+
content = raw[1:]
|
|
89
|
+
if _AUTH_RE.search(content):
|
|
90
|
+
flags.append(
|
|
91
|
+
RiskFlag(
|
|
92
|
+
flag="modifies_auth",
|
|
93
|
+
file=change.path,
|
|
94
|
+
line=new_lineno,
|
|
95
|
+
snippet=content.strip()[:200],
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
new_lineno += 1
|
|
99
|
+
|
|
100
|
+
elif raw.startswith("-") and not raw.startswith("---"):
|
|
101
|
+
content = raw[1:]
|
|
102
|
+
# Only flag top-level (no leading whitespace) function removals
|
|
103
|
+
if not content[:1].isspace():
|
|
104
|
+
m = _FUNC_DEF_RE.match(content)
|
|
105
|
+
if m:
|
|
106
|
+
flags.append(
|
|
107
|
+
RiskFlag(
|
|
108
|
+
flag="deletes_public_api",
|
|
109
|
+
file=change.path,
|
|
110
|
+
line=old_lineno,
|
|
111
|
+
snippet=content.strip()[:200],
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
old_lineno += 1
|
|
115
|
+
|
|
116
|
+
else:
|
|
117
|
+
# context line — advances both counters
|
|
118
|
+
new_lineno += 1
|
|
119
|
+
old_lineno += 1
|
|
120
|
+
|
|
121
|
+
return flags
|
src/briefing/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
"""PR briefing generation with senior-engineer-voice prompts and structured output."""
|
|
2
|
+
from src.briefing.generator import Briefing, generate_briefing
|
|
3
|
+
from src.briefing.prompt_templates import SYSTEM_PROMPT
|
|
4
|
+
|
|
5
|
+
__all__ = ["Briefing", "generate_briefing", "SYSTEM_PROMPT"]
|