github-pr-context-mcp 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analytics/__init__.py +3 -0
- analytics/usage_metrics.py +185 -0
- app/__init__.py +3 -0
- app/mcp_app.py +928 -0
- auth/__init__.py +3 -0
- auth/gmail_identity.py +236 -0
- entrypoints/deployed/server.py +34 -0
- entrypoints/local/server.py +273 -0
- fetcher/__init__.py +3 -0
- fetcher/client.py +131 -0
- fetcher/queries.py +67 -0
- fetcher/transform.py +55 -0
- github_pr_context_mcp-0.2.5.dist-info/METADATA +192 -0
- github_pr_context_mcp-0.2.5.dist-info/RECORD +25 -0
- github_pr_context_mcp-0.2.5.dist-info/WHEEL +5 -0
- github_pr_context_mcp-0.2.5.dist-info/entry_points.txt +2 -0
- github_pr_context_mcp-0.2.5.dist-info/licenses/LICENSE +21 -0
- github_pr_context_mcp-0.2.5.dist-info/top_level.txt +7 -0
- inference/__init__.py +3 -0
- inference/providers.py +296 -0
- inference/review.py +175 -0
- storage/__init__.py +19 -0
- storage/document_builder.py +74 -0
- storage/encoder.py +35 -0
- storage/vector_store.py +270 -0
fetcher/queries.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# GraphQL query strings only — no HTTP, no transformation logic here.
|
|
2
|
+
|
|
3
|
+
PR_QUERY = """
|
|
4
|
+
query GetPRs($owner: String!, $repo: String!, $cursor: String) {
|
|
5
|
+
repository(owner: $owner, name: $repo) {
|
|
6
|
+
pullRequests(
|
|
7
|
+
last: 30,
|
|
8
|
+
states: [MERGED, CLOSED],
|
|
9
|
+
before: $cursor,
|
|
10
|
+
orderBy: {field: UPDATED_AT, direction: DESC}
|
|
11
|
+
) {
|
|
12
|
+
pageInfo {
|
|
13
|
+
hasPreviousPage
|
|
14
|
+
startCursor
|
|
15
|
+
}
|
|
16
|
+
nodes {
|
|
17
|
+
number
|
|
18
|
+
title
|
|
19
|
+
body
|
|
20
|
+
author { login }
|
|
21
|
+
createdAt
|
|
22
|
+
mergedAt
|
|
23
|
+
additions
|
|
24
|
+
deletions
|
|
25
|
+
files(first: 100) {
|
|
26
|
+
nodes {
|
|
27
|
+
path
|
|
28
|
+
additions
|
|
29
|
+
deletions
|
|
30
|
+
changeType
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
reviewThreads(first: 100) {
|
|
34
|
+
nodes {
|
|
35
|
+
isResolved
|
|
36
|
+
path
|
|
37
|
+
line
|
|
38
|
+
diffHunk
|
|
39
|
+
comments(first: 50) {
|
|
40
|
+
nodes {
|
|
41
|
+
author { login }
|
|
42
|
+
body
|
|
43
|
+
createdAt
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
commits(first: 10) {
|
|
49
|
+
nodes {
|
|
50
|
+
commit {
|
|
51
|
+
message
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
reviews(first: 50) {
|
|
56
|
+
nodes {
|
|
57
|
+
author { login }
|
|
58
|
+
state
|
|
59
|
+
body
|
|
60
|
+
submittedAt
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
"""
|
fetcher/transform.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Raw GraphQL response → clean Python dicts.
|
|
2
|
+
# No HTTP calls, no ChromaDB, no embedding logic here.
|
|
3
|
+
|
|
4
|
+
def flatten_pr(raw_pr: dict) -> dict:
|
|
5
|
+
"""Convert a single raw GraphQL PR node into a clean, flat dict."""
|
|
6
|
+
review_comments = []
|
|
7
|
+
for thread in raw_pr["reviewThreads"]["nodes"]:
|
|
8
|
+
for comment in thread["comments"]["nodes"]:
|
|
9
|
+
review_comments.append({
|
|
10
|
+
"file": thread["path"],
|
|
11
|
+
"line": thread["line"],
|
|
12
|
+
"resolved": thread["isResolved"],
|
|
13
|
+
"author": comment["author"]["login"] if comment["author"] else "ghost",
|
|
14
|
+
"body": comment["body"],
|
|
15
|
+
"created_at": comment["createdAt"],
|
|
16
|
+
"diff_hunk": thread.get("diffHunk", ""),
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
return {
|
|
20
|
+
"number": raw_pr["number"],
|
|
21
|
+
"title": raw_pr["title"],
|
|
22
|
+
"body": raw_pr["body"] or "",
|
|
23
|
+
"author": raw_pr["author"]["login"] if raw_pr["author"] else "ghost",
|
|
24
|
+
"created_at": raw_pr["createdAt"],
|
|
25
|
+
"merged_at": raw_pr["mergedAt"],
|
|
26
|
+
"additions": raw_pr["additions"],
|
|
27
|
+
"deletions": raw_pr["deletions"],
|
|
28
|
+
"files": [
|
|
29
|
+
{
|
|
30
|
+
"path": f["path"],
|
|
31
|
+
"additions": f["additions"],
|
|
32
|
+
"deletions": f["deletions"],
|
|
33
|
+
"change_type": f["changeType"],
|
|
34
|
+
}
|
|
35
|
+
for f in raw_pr["files"]["nodes"]
|
|
36
|
+
],
|
|
37
|
+
"review_comments": review_comments,
|
|
38
|
+
"commits": [
|
|
39
|
+
{"message": c["commit"]["message"]}
|
|
40
|
+
for c in raw_pr["commits"]["nodes"]
|
|
41
|
+
],
|
|
42
|
+
"reviews": [
|
|
43
|
+
{
|
|
44
|
+
"author": r["author"]["login"] if r["author"] else "ghost",
|
|
45
|
+
"state": r["state"],
|
|
46
|
+
"body": r["body"] or "",
|
|
47
|
+
"submitted_at": r["submittedAt"],
|
|
48
|
+
}
|
|
49
|
+
for r in raw_pr["reviews"]["nodes"]
|
|
50
|
+
],
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
def flatten_prs(nodes: list[dict]) -> list[dict]:
|
|
54
|
+
"""Flatten a list of raw GraphQL PR nodes."""
|
|
55
|
+
return [flatten_pr(pr) for pr in nodes]
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: github-pr-context-mcp
|
|
3
|
+
Version: 0.2.5
|
|
4
|
+
Summary: GitHub PR Review Context MCP Server
|
|
5
|
+
Author: Paarth Gala
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: mcp
|
|
10
|
+
Requires-Dist: chromadb
|
|
11
|
+
Requires-Dist: sentence-transformers
|
|
12
|
+
Requires-Dist: python-dotenv
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Requires-Dist: cerebras-cloud-sdk
|
|
15
|
+
Requires-Dist: openai
|
|
16
|
+
Requires-Dist: anthropic
|
|
17
|
+
Requires-Dist: google-generativeai
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# GitHub PR Review Context MCP
|
|
21
|
+
|
|
22
|
+
<div align="center">
|
|
23
|
+
|
|
24
|
+

|
|
25
|
+

|
|
26
|
+

|
|
27
|
+

|
|
28
|
+

|
|
29
|
+
[](LICENSE)
|
|
30
|
+

|
|
31
|
+
|
|
32
|
+
**Production-grade context layer for AI code review, grounded in your repository's real pull request history.**
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
> Tracking unique users across **uvx**, **pipx**, and **local** sources. (Render hosting upcoming)
|
|
36
|
+
|
|
37
|
+
</div>
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Overview
|
|
42
|
+
|
|
43
|
+
GitHub PR Review Context MCP gives AI assistants institutional review memory.
|
|
44
|
+
|
|
45
|
+
Instead of generic feedback, reviews are informed by historical reviewer comments, recurring quality patterns, and repository-specific standards from your own PR history.
|
|
46
|
+
|
|
47
|
+
### Core Value
|
|
48
|
+
|
|
49
|
+
- Improves review consistency across teams and repositories.
|
|
50
|
+
- Reduces repeated reviewer feedback on known issues.
|
|
51
|
+
- Integrates with any MCP-compatible client and multiple LLM providers.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## 🛠️ Usage Modes: Solo vs. Team
|
|
56
|
+
|
|
57
|
+
This MCP server is built to scale from a single machine to an entire engineering organization.
|
|
58
|
+
|
|
59
|
+
### 👤 Solo Developer (Local Mode)
|
|
60
|
+
**Best for:** Privacy, local-first control, and zero hosting costs.
|
|
61
|
+
- **How it works:** Run via `uvx`, `pipx`, or a local git clone.
|
|
62
|
+
- **Storage:** ChromaDB stays on your local machine.
|
|
63
|
+
- **Security:** Your GitHub Token and LLM keys never leave your device.
|
|
64
|
+
- **Setup:** See [Quick Start](docs/quickstart.md#🚀-zero-setup-uvx--pipx--npx).
|
|
65
|
+
|
|
66
|
+
### 🤝 Team Collaboration (Hosted Mode - UPCOMING)
|
|
67
|
+
**Best for:** Scaling team-wide PR standards and centralized infra.
|
|
68
|
+
- **How it works:** One deployment on Render (Coming Soon) shared by the whole team.
|
|
69
|
+
- **Isolation:** Strict **Gmail-based namespace isolation** (driven by SQLite). User A's indexed data is mathematically invisible to User B.
|
|
70
|
+
- **Economics:** Pooled LLM credits and a single shared indexing server.
|
|
71
|
+
- **Setup:** See [Deployment Guide](docs/integrations/deployed.md).
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
### 🌟 Zero-Friction Setup (Upcoming)
|
|
76
|
+
If your team has Hosted this MCP on Render, you do **NOT** need to `git clone` or install anything. You just drop a snippet into your IDE:
|
|
77
|
+
|
|
78
|
+
```json
|
|
79
|
+
"github-pr-context": {
|
|
80
|
+
"type": "sse",
|
|
81
|
+
"url": "https://YOUR-RENDER-URL.onrender.com/mcp",
|
|
82
|
+
"headers": {
|
|
83
|
+
"Authorization": "Bearer YOUR_TOKEN"
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
*That's it.* If your IDE supports native MCP SSE connections, you are immediately connected to the secure Render deployment. No setup friction, no tools required.
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## Key Capabilities
|
|
92
|
+
|
|
93
|
+
| Capability | What It Delivers |
|
|
94
|
+
|---|---|
|
|
95
|
+
| Historical review retrieval | Semantic search across prior PR comments and review summaries |
|
|
96
|
+
| Context-aware AI review | Feedback grounded in repository-specific review behavior |
|
|
97
|
+
| Grounded code generation | Generate new code based on past commits, comments, and style |
|
|
98
|
+
| **Team rules generation** | **Auto-generate .cursorrules / CLAUDE.md from repo history** |
|
|
99
|
+
| Smart repository readiness | Auto-detect indexed state and index on demand |
|
|
100
|
+
| Flexible storage modes | Permanent (disk) and temporary (in-memory) indexing options |
|
|
101
|
+
| Portable inference layer | Switch LLM providers using environment configuration only |
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Demo
|
|
106
|
+
|
|
107
|
+

|
|
108
|
+
|
|
109
|
+
Example workflow:
|
|
110
|
+
- Ask the assistant to review a diff using repository history.
|
|
111
|
+
- The server retrieves similar past review context.
|
|
112
|
+
- The model returns grounded feedback aligned to team expectations.
|
|
113
|
+
|
|
114
|
+
## Usage Analytics
|
|
115
|
+
|
|
116
|
+
To help us understand adoption, the MCP server collects privacy-first, anonymous telemetry on deployments. Future hosted deployments will expose HTTP endpoints (`/stats` and `/ping`) that publicly display the **number of unique users**.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## 🧰 Core Tools Reference
|
|
121
|
+
|
|
122
|
+
The server exposes 12 core tools for IDE agents and developers. For a deep dive on when to use each, see the [**Tool Strategy Guide**](docs/tools_strategy.md).
|
|
123
|
+
|
|
124
|
+
| Tool | Action |
|
|
125
|
+
|---|---|
|
|
126
|
+
| `ensure_repo_ready` | Index a repo and ensure it's ready for queries |
|
|
127
|
+
| `generate_repo_rules` | **Synthesize .cursorrules / CLAUDE.md from PR history** |
|
|
128
|
+
| `generate_code_from_history`| Write code grounded in past commits & team style |
|
|
129
|
+
| `review_code_with_history` | Perform AI review grounded in team review memory |
|
|
130
|
+
| `get_team_review_patterns` | Summarize recurring team standards (e.g. "no magic numbers") |
|
|
131
|
+
| `semantic_search_reviews` | Search past PR comments by meaning, not just keywords |
|
|
132
|
+
| `set_active_repo` | Switch between multiple indexed repositories |
|
|
133
|
+
| `list_indexed_repos` | View all repos currently in local/temporary storage |
|
|
134
|
+
| `delete_repo_index` | Free up disk space by clearing repository indices |
|
|
135
|
+
| `get_index_stats` | Verify if a repo index is complete (doc count) |
|
|
136
|
+
| `update_settings` | Update tokens/LLM keys (Hosted mode only) |
|
|
137
|
+
| `get_usage_stats` | View adoption metrics and unique user counts |
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## Documentation
|
|
142
|
+
|
|
143
|
+
Detailed guides are split into focused pages:
|
|
144
|
+
|
|
145
|
+
- [Quick Start and Usage](docs/quickstart.md)
|
|
146
|
+
- [LLM Configuration](docs/llm-configuration.md)
|
|
147
|
+
- [Integrations](docs/integrations/index.md)
|
|
148
|
+
- [Architecture and Tools](docs/architecture.md)
|
|
149
|
+
- [Pipeline Deep Dive](docs/pipeline.md)
|
|
150
|
+
- [Configuration Guide (Change Tokens/Settings)](docs/guides/configuration.md)
|
|
151
|
+
- [Roadmap](docs/roadmap.md)
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Quick Links
|
|
156
|
+
|
|
157
|
+
- Access setup: [GitHub Token Guide](docs/GUIDE_GITHUB_TOKEN.md)
|
|
158
|
+
- Client connection: [Integrations](docs/integrations/index.md)
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## 📣 Community & Feedback
|
|
163
|
+
|
|
164
|
+
We want to hear from you—whether you are a solo developer or a team at a large company!
|
|
165
|
+
|
|
166
|
+
### 👤 For Individuals
|
|
167
|
+
- **Feedback**: Please open an issue or start a discussion if you have ideas or encounter bugs.
|
|
168
|
+
- **Show your support**: If this tool saves you time, give it a **Star ⭐**! It helps others find the project.
|
|
169
|
+
|
|
170
|
+
### 🏢 For Corporate & Teams
|
|
171
|
+
- **Usage**: Is your team using this MCP server? Join our "Adopters" list by opening a PR to add your team's name.
|
|
172
|
+
- **Corporate Feedback**: Open an issue with the `corporate-usage` label to tell us how this has improved your PR review workflow.
|
|
173
|
+
- **Custom Integration**: Need help deploying this to your private cloud? Reach out via GitHub Discussions.
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## 📜 Documentation & Guides
|
|
178
|
+
|
|
179
|
+
- **Strategy & Best Practices**: [Tool Strategy & Selection Guide](docs/tools_strategy.md)
|
|
180
|
+
- **Architecture**: [Architecture and Tools](docs/architecture.md)
|
|
181
|
+
- **Pipeline**: [Pipeline Deep Dive](docs/pipeline.md)
|
|
182
|
+
- **Usage**: [Quick Start and Usage](docs/quickstart.md)
|
|
183
|
+
|
|
184
|
+
## 🛠️ Troubleshooting
|
|
185
|
+
|
|
186
|
+
- **"command not found"**: Use absolute paths in your configuration. Run `github-pr-context-mcp config` to get your exact path.
|
|
187
|
+
- **"PermissionError: [WinError 32]"**: The binary is locked by a running process. Close Claude/Cursor, run `taskkill /F /IM github-pr-context-mcp.exe`, then retry the upgrade.
|
|
188
|
+
- **Rate Limit Errors**: Ensure your `GITHUB_TOKEN` is valid and has `repo` scope.
|
|
189
|
+
|
|
190
|
+
## ⚖️ License
|
|
191
|
+
|
|
192
|
+
MIT
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
analytics/__init__.py,sha256=bGt2HZvSi9zx8r84EXYUaK5ACOy0i5_E8U2oE1CyaBs,90
|
|
2
|
+
analytics/usage_metrics.py,sha256=Kp78y1hsNouAlyZ8OQ-CVYSCv_X17M6OcXhG3UYefN4,8791
|
|
3
|
+
app/__init__.py,sha256=sqeHWMqFLhIETKmsSJrccwQVpvdTrislT6V4g0A98rw,50
|
|
4
|
+
app/mcp_app.py,sha256=PpGrpZumfD-xv7hpjvjZhXlScxcDHoCcii6H9Mm-xK0,34289
|
|
5
|
+
auth/__init__.py,sha256=ynl-1KLMvJRG-MQij8IBI3-gLXKHJF4yQCA4stiOh24,172
|
|
6
|
+
auth/gmail_identity.py,sha256=eAr0XQowOnX0X7-nxcUMvnpD6oOmSc6GGBa4_o-aQlI,8925
|
|
7
|
+
entrypoints/deployed/server.py,sha256=1HwlLLi-1_9OLA-CzylOwTmmC2DUygQZpi6xSPw8wzs,761
|
|
8
|
+
entrypoints/local/server.py,sha256=NNxs96lrVOhZPqQu5yUpNUtWHHqyTiNW1IHQEP-D9J4,11195
|
|
9
|
+
fetcher/__init__.py,sha256=Ds51hEct0obY0SM0xPbZgd2BSLHeFHFSTSZp8jQsLK8,62
|
|
10
|
+
fetcher/client.py,sha256=fhbpjp0Te9PGc9g85WPhrtlkfKQnwXhEr-Luowkq-k8,4574
|
|
11
|
+
fetcher/queries.py,sha256=H2i5nULQJDJWBRlXETQCF3QSxu8whV5HXieldY-WG9I,1345
|
|
12
|
+
fetcher/transform.py,sha256=_fQ7y74Ou9LR5KfknzS7S9Yl3YCE6j4sv6yrSzP095E,2042
|
|
13
|
+
github_pr_context_mcp-0.2.5.dist-info/licenses/LICENSE,sha256=M4TB72oBDWxvebDG6nolZTQVKRe-cVrdC_8JJSFVGic,1068
|
|
14
|
+
inference/__init__.py,sha256=4lPbvKJw0vixkpHjWdHd1PWVX5tJiaa5wHFF1SS7TUk,224
|
|
15
|
+
inference/providers.py,sha256=ishiUiDU_vemU6fYU45DEoCvc1NXWGrgZbD1Qc4rGls,10912
|
|
16
|
+
inference/review.py,sha256=o0jqJx9xEio9vxAOJy0DSmTUDnJYHhshvolQ7gnAAcg,6503
|
|
17
|
+
storage/__init__.py,sha256=ueQibOr9NqA_slWfCTQ47apkzLjQYu_Q6nC1ZYcnofs,404
|
|
18
|
+
storage/document_builder.py,sha256=-CQgtE1VgjkXiRDL0xptoKJ7uqBkRjyCUi_y4gBWvCo,2685
|
|
19
|
+
storage/encoder.py,sha256=CBx-xPkFYfApA9mHRuYKh_fa6-7VA7CBE24TF0fejHs,1141
|
|
20
|
+
storage/vector_store.py,sha256=cAdzshP-cQ7w2mUOePUMNfWmWpokUUrFASr6B9IETGU,9592
|
|
21
|
+
github_pr_context_mcp-0.2.5.dist-info/METADATA,sha256=P4OmHFpHgpNS-_OYxdLaQ1dFgaKqvFI-Zfn6N5gECjo,7896
|
|
22
|
+
github_pr_context_mcp-0.2.5.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
23
|
+
github_pr_context_mcp-0.2.5.dist-info/entry_points.txt,sha256=_tVIQ9b41eiaNOOAOYKx5eteC80MLN0V7apxFZBBI_0,72
|
|
24
|
+
github_pr_context_mcp-0.2.5.dist-info/top_level.txt,sha256=2m7n-NQrzlzfMlSk3nhopmv_PQPDq0d6MK1SceIULMM,57
|
|
25
|
+
github_pr_context_mcp-0.2.5.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Paarth Gala
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
inference/__init__.py
ADDED
inference/providers.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
# Unified LLM provider adapter.
|
|
2
|
+
# Supports: cerebras | openai | anthropic | ollama | groq | gemini
|
|
3
|
+
# Configured entirely via environment variables — no code changes needed to switch.
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import time
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
|
|
11
|
+
load_dotenv()
|
|
12
|
+
|
|
13
|
+
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "cerebras").lower()
|
|
14
|
+
LLM_MODEL = os.getenv("LLM_MODEL", "llama3.1-8b")
|
|
15
|
+
LLM_BASE_URL = os.getenv("LLM_BASE_URL", "") # Required only for Ollama
|
|
16
|
+
|
|
17
|
+
# OpenAI-compatible providers — share the same client interface
|
|
18
|
+
_OPENAI_COMPATIBLE = {"cerebras", "openai", "ollama", "groq"}
|
|
19
|
+
_RATE_LIMIT_PATTERN = re.compile(
|
|
20
|
+
r"rate limit|too many requests|quota exceeded|429",
|
|
21
|
+
re.IGNORECASE,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _effective_settings(settings: dict | None = None) -> dict[str, str]:
|
|
26
|
+
settings = settings or {}
|
|
27
|
+
provider = str(settings.get("llm_provider") or LLM_PROVIDER).strip().lower()
|
|
28
|
+
model = str(settings.get("llm_model") or LLM_MODEL).strip()
|
|
29
|
+
api_key = str(settings.get("llm_api_key") or "").strip()
|
|
30
|
+
base_url = str(settings.get("llm_base_url") or LLM_BASE_URL).strip()
|
|
31
|
+
return {
|
|
32
|
+
"llm_provider": provider,
|
|
33
|
+
"llm_model": model,
|
|
34
|
+
"llm_api_key": api_key,
|
|
35
|
+
"llm_base_url": base_url,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def chat(
|
|
40
|
+
messages: list[dict],
|
|
41
|
+
system: str = "",
|
|
42
|
+
max_tokens: int = 1024,
|
|
43
|
+
settings: dict | None = None,
|
|
44
|
+
) -> str:
|
|
45
|
+
"""
|
|
46
|
+
Unified chat completion across all supported providers.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
messages: List of {"role": "user"|"assistant", "content": str}
|
|
50
|
+
system: Optional system prompt string
|
|
51
|
+
max_tokens: Max tokens to generate
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
The assistant's reply as a string.
|
|
55
|
+
"""
|
|
56
|
+
try:
|
|
57
|
+
effective = _effective_settings(settings)
|
|
58
|
+
provider = effective["llm_provider"]
|
|
59
|
+
|
|
60
|
+
if provider in _OPENAI_COMPATIBLE:
|
|
61
|
+
return _openai_compatible(messages, system, max_tokens, effective)
|
|
62
|
+
elif provider == "anthropic":
|
|
63
|
+
return _anthropic(messages, system, max_tokens, effective)
|
|
64
|
+
elif provider == "gemini":
|
|
65
|
+
return _gemini(messages, system, max_tokens, effective)
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Unknown LLM_PROVIDER: '{provider}'. "
|
|
69
|
+
"Valid options: cerebras, openai, anthropic, ollama, groq, gemini"
|
|
70
|
+
)
|
|
71
|
+
except Exception as error:
|
|
72
|
+
retry_message = _format_rate_limit_message(error, settings=settings)
|
|
73
|
+
if retry_message:
|
|
74
|
+
raise RuntimeError(retry_message) from error
|
|
75
|
+
raise
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ── OpenAI-compatible (Cerebras, OpenAI, Ollama, Groq) ───────────────────────
|
|
79
|
+
|
|
80
|
+
def _build_openai_client(settings: dict[str, str]):
|
|
81
|
+
"""Return the right OpenAI-compatible client for the configured provider."""
|
|
82
|
+
provider = settings["llm_provider"]
|
|
83
|
+
if provider == "cerebras":
|
|
84
|
+
try:
|
|
85
|
+
from cerebras.cloud.sdk import Cerebras
|
|
86
|
+
return Cerebras(api_key=_require_key("CEREBRAS_API_KEY", "cloud.cerebras.ai", settings))
|
|
87
|
+
except ImportError:
|
|
88
|
+
raise ImportError("Run: pip install cerebras-cloud-sdk")
|
|
89
|
+
|
|
90
|
+
elif provider == "groq":
|
|
91
|
+
try:
|
|
92
|
+
from openai import OpenAI
|
|
93
|
+
return OpenAI(
|
|
94
|
+
api_key=_require_key("GROQ_API_KEY", "console.groq.com/keys", settings),
|
|
95
|
+
base_url="https://api.groq.com/openai/v1",
|
|
96
|
+
)
|
|
97
|
+
except ImportError:
|
|
98
|
+
raise ImportError("Run: pip install openai")
|
|
99
|
+
|
|
100
|
+
elif provider == "ollama":
|
|
101
|
+
try:
|
|
102
|
+
from openai import OpenAI
|
|
103
|
+
base_url = settings.get("llm_base_url") or "http://localhost:11434/v1"
|
|
104
|
+
return OpenAI(base_url=base_url, api_key="ollama")
|
|
105
|
+
except ImportError:
|
|
106
|
+
raise ImportError("Run: pip install openai")
|
|
107
|
+
|
|
108
|
+
else: # openai
|
|
109
|
+
try:
|
|
110
|
+
from openai import OpenAI
|
|
111
|
+
return OpenAI(api_key=_require_key("OPENAI_API_KEY", "platform.openai.com", settings))
|
|
112
|
+
except ImportError:
|
|
113
|
+
raise ImportError("Run: pip install openai")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _openai_compatible(messages: list[dict], system: str, max_tokens: int, settings: dict[str, str]) -> str:
|
|
117
|
+
client = _build_openai_client(settings)
|
|
118
|
+
full_messages = (
|
|
119
|
+
[{"role": "system", "content": system}] if system else []
|
|
120
|
+
) + messages
|
|
121
|
+
|
|
122
|
+
response = client.chat.completions.create(
|
|
123
|
+
model=settings["llm_model"],
|
|
124
|
+
max_tokens=max_tokens,
|
|
125
|
+
messages=full_messages,
|
|
126
|
+
)
|
|
127
|
+
return response.choices[0].message.content
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ── Anthropic ──────────────────────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
def _anthropic(messages: list[dict], system: str, max_tokens: int, settings: dict[str, str]) -> str:
|
|
133
|
+
try:
|
|
134
|
+
import anthropic
|
|
135
|
+
except ImportError:
|
|
136
|
+
raise ImportError("Run: pip install anthropic")
|
|
137
|
+
|
|
138
|
+
client = anthropic.Anthropic(
|
|
139
|
+
api_key=_require_key("ANTHROPIC_API_KEY", "console.anthropic.com", settings)
|
|
140
|
+
)
|
|
141
|
+
kwargs = {"model": settings["llm_model"], "max_tokens": max_tokens, "messages": messages}
|
|
142
|
+
if system:
|
|
143
|
+
kwargs["system"] = system
|
|
144
|
+
|
|
145
|
+
response = client.messages.create(**kwargs)
|
|
146
|
+
return response.content[0].text
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ── Gemini ────────────────────────────────────────────────────────────────────
|
|
150
|
+
|
|
151
|
+
def _gemini(messages: list[dict], system: str, max_tokens: int, settings: dict[str, str]) -> str:
|
|
152
|
+
try:
|
|
153
|
+
import google.generativeai as genai
|
|
154
|
+
except ImportError:
|
|
155
|
+
raise ImportError("Run: pip install google-generativeai")
|
|
156
|
+
|
|
157
|
+
genai.configure(api_key=_require_key("GEMINI_API_KEY", "aistudio.google.com", settings))
|
|
158
|
+
|
|
159
|
+
model = genai.GenerativeModel(
|
|
160
|
+
model_name=settings["llm_model"],
|
|
161
|
+
system_instruction=system or None,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Convert OpenAI-style message list to Gemini's format
|
|
165
|
+
gemini_history = []
|
|
166
|
+
last_user_message = ""
|
|
167
|
+
|
|
168
|
+
for msg in messages:
|
|
169
|
+
role = "user" if msg["role"] == "user" else "model"
|
|
170
|
+
if msg["role"] == "user":
|
|
171
|
+
last_user_message = msg["content"]
|
|
172
|
+
else:
|
|
173
|
+
gemini_history.append({"role": role, "parts": [msg["content"]]})
|
|
174
|
+
|
|
175
|
+
chat_session = model.start_chat(history=gemini_history)
|
|
176
|
+
response = chat_session.send_message(
|
|
177
|
+
last_user_message,
|
|
178
|
+
generation_config=genai.GenerationConfig(max_output_tokens=max_tokens),
|
|
179
|
+
)
|
|
180
|
+
return response.text
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# ── Helper ─────────────────────────────────────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
def _require_key(env_var: str, signup_url: str, settings: dict[str, str] | None = None) -> str:
|
|
186
|
+
"""
|
|
187
|
+
Get an API key from env. Checks the provider-specific var first,
|
|
188
|
+
then falls back to the universal LLM_API_KEY so users only need
|
|
189
|
+
to change one value when switching providers.
|
|
190
|
+
"""
|
|
191
|
+
settings = settings or {}
|
|
192
|
+
value = settings.get("llm_api_key") or os.getenv(env_var) or os.getenv("LLM_API_KEY")
|
|
193
|
+
if not value:
|
|
194
|
+
raise EnvironmentError(
|
|
195
|
+
f"No API key found. Set either '{env_var}' or 'LLM_API_KEY' in your .env file.\n"
|
|
196
|
+
f"Get your key at: {signup_url}"
|
|
197
|
+
)
|
|
198
|
+
return value
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _format_rate_limit_message(error: Exception, settings: dict | None = None) -> str | None:
|
|
202
|
+
"""Turn provider rate-limit errors into a reset-time hint."""
|
|
203
|
+
if not _looks_like_rate_limit(error):
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
retry_after_seconds, reset_at_text = _rate_limit_reset_hint(error)
|
|
207
|
+
provider_name = _effective_settings(settings)["llm_provider"].capitalize()
|
|
208
|
+
|
|
209
|
+
if retry_after_seconds is not None:
|
|
210
|
+
retry_hours = max(retry_after_seconds / 3600, 0.0)
|
|
211
|
+
if retry_hours < 1:
|
|
212
|
+
retry_minutes = max(round(retry_after_seconds / 60), 1)
|
|
213
|
+
return (
|
|
214
|
+
f"{provider_name} rate limit reached. Try again after about "
|
|
215
|
+
f"{retry_minutes} minutes."
|
|
216
|
+
)
|
|
217
|
+
return (
|
|
218
|
+
f"{provider_name} rate limit reached. Try again after about "
|
|
219
|
+
f"{retry_hours:.1f} hours."
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if reset_at_text:
|
|
223
|
+
return (
|
|
224
|
+
f"{provider_name} rate limit reached. Limit resets at {reset_at_text}. "
|
|
225
|
+
"Try again after that reset time."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return (
|
|
229
|
+
f"{provider_name} rate limit reached. Check the provider dashboard for "
|
|
230
|
+
"when the quota resets and try again after that."
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _looks_like_rate_limit(error: Exception) -> bool:
|
|
235
|
+
"""Detect common provider quota and throttling failures."""
|
|
236
|
+
response = getattr(error, "response", None)
|
|
237
|
+
status_code = getattr(response, "status_code", None)
|
|
238
|
+
if status_code == 429:
|
|
239
|
+
return True
|
|
240
|
+
|
|
241
|
+
response_headers = getattr(response, "headers", None) or {}
|
|
242
|
+
if any(key.lower() in {"retry-after", "x-ratelimit-reset"} for key in response_headers):
|
|
243
|
+
return True
|
|
244
|
+
|
|
245
|
+
return bool(_RATE_LIMIT_PATTERN.search(str(error)))
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _rate_limit_reset_hint(error: Exception) -> tuple[int | None, str | None]:
|
|
249
|
+
"""Extract retry timing hints from the provider exception when available."""
|
|
250
|
+
header_sources = []
|
|
251
|
+
|
|
252
|
+
response = getattr(error, "response", None)
|
|
253
|
+
if response is not None:
|
|
254
|
+
headers = getattr(response, "headers", None)
|
|
255
|
+
if headers:
|
|
256
|
+
header_sources.append(headers)
|
|
257
|
+
|
|
258
|
+
headers = getattr(error, "headers", None)
|
|
259
|
+
if headers:
|
|
260
|
+
header_sources.append(headers)
|
|
261
|
+
|
|
262
|
+
retry_after = None
|
|
263
|
+
reset_at = None
|
|
264
|
+
|
|
265
|
+
for header_map in header_sources:
|
|
266
|
+
for key, value in header_map.items():
|
|
267
|
+
normalized_key = key.lower()
|
|
268
|
+
if normalized_key == "retry-after" and retry_after is None:
|
|
269
|
+
retry_after = value
|
|
270
|
+
elif normalized_key == "x-ratelimit-reset" and reset_at is None:
|
|
271
|
+
reset_at = value
|
|
272
|
+
|
|
273
|
+
retry_after_seconds = _coerce_retry_after_seconds(retry_after)
|
|
274
|
+
if retry_after_seconds is not None:
|
|
275
|
+
return retry_after_seconds, None
|
|
276
|
+
|
|
277
|
+
reset_seconds = _coerce_retry_after_seconds(reset_at)
|
|
278
|
+
if reset_seconds is not None:
|
|
279
|
+
remaining_seconds = int(round(reset_seconds - time.time()))
|
|
280
|
+
if remaining_seconds > 0:
|
|
281
|
+
reset_time = datetime.fromtimestamp(reset_seconds, tz=timezone.utc)
|
|
282
|
+
reset_text = reset_time.strftime("%Y-%m-%d %H:%M UTC")
|
|
283
|
+
return remaining_seconds, reset_text
|
|
284
|
+
|
|
285
|
+
return None, None
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _coerce_retry_after_seconds(value: object) -> int | None:
|
|
289
|
+
"""Convert a retry/reset header into seconds when possible."""
|
|
290
|
+
if value is None:
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
return int(float(str(value).strip()))
|
|
295
|
+
except (TypeError, ValueError):
|
|
296
|
+
return None
|