github-pr-context-mcp 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fetcher/queries.py ADDED
@@ -0,0 +1,67 @@
1
+ # GraphQL query strings only — no HTTP, no transformation logic here.
2
+
3
+ PR_QUERY = """
4
+ query GetPRs($owner: String!, $repo: String!, $cursor: String) {
5
+ repository(owner: $owner, name: $repo) {
6
+ pullRequests(
7
+ last: 30,
8
+ states: [MERGED, CLOSED],
9
+ before: $cursor,
10
+ orderBy: {field: UPDATED_AT, direction: DESC}
11
+ ) {
12
+ pageInfo {
13
+ hasPreviousPage
14
+ startCursor
15
+ }
16
+ nodes {
17
+ number
18
+ title
19
+ body
20
+ author { login }
21
+ createdAt
22
+ mergedAt
23
+ additions
24
+ deletions
25
+ files(first: 100) {
26
+ nodes {
27
+ path
28
+ additions
29
+ deletions
30
+ changeType
31
+ }
32
+ }
33
+ reviewThreads(first: 100) {
34
+ nodes {
35
+ isResolved
36
+ path
37
+ line
38
+ diffHunk
39
+ comments(first: 50) {
40
+ nodes {
41
+ author { login }
42
+ body
43
+ createdAt
44
+ }
45
+ }
46
+ }
47
+ }
48
+ commits(first: 10) {
49
+ nodes {
50
+ commit {
51
+ message
52
+ }
53
+ }
54
+ }
55
+ reviews(first: 50) {
56
+ nodes {
57
+ author { login }
58
+ state
59
+ body
60
+ submittedAt
61
+ }
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ """
fetcher/transform.py ADDED
@@ -0,0 +1,55 @@
1
+ # Raw GraphQL response → clean Python dicts.
2
+ # No HTTP calls, no ChromaDB, no embedding logic here.
3
+
4
+ def flatten_pr(raw_pr: dict) -> dict:
5
+ """Convert a single raw GraphQL PR node into a clean, flat dict."""
6
+ review_comments = []
7
+ for thread in raw_pr["reviewThreads"]["nodes"]:
8
+ for comment in thread["comments"]["nodes"]:
9
+ review_comments.append({
10
+ "file": thread["path"],
11
+ "line": thread["line"],
12
+ "resolved": thread["isResolved"],
13
+ "author": comment["author"]["login"] if comment["author"] else "ghost",
14
+ "body": comment["body"],
15
+ "created_at": comment["createdAt"],
16
+ "diff_hunk": thread.get("diffHunk", ""),
17
+ })
18
+
19
+ return {
20
+ "number": raw_pr["number"],
21
+ "title": raw_pr["title"],
22
+ "body": raw_pr["body"] or "",
23
+ "author": raw_pr["author"]["login"] if raw_pr["author"] else "ghost",
24
+ "created_at": raw_pr["createdAt"],
25
+ "merged_at": raw_pr["mergedAt"],
26
+ "additions": raw_pr["additions"],
27
+ "deletions": raw_pr["deletions"],
28
+ "files": [
29
+ {
30
+ "path": f["path"],
31
+ "additions": f["additions"],
32
+ "deletions": f["deletions"],
33
+ "change_type": f["changeType"],
34
+ }
35
+ for f in raw_pr["files"]["nodes"]
36
+ ],
37
+ "review_comments": review_comments,
38
+ "commits": [
39
+ {"message": c["commit"]["message"]}
40
+ for c in raw_pr["commits"]["nodes"]
41
+ ],
42
+ "reviews": [
43
+ {
44
+ "author": r["author"]["login"] if r["author"] else "ghost",
45
+ "state": r["state"],
46
+ "body": r["body"] or "",
47
+ "submitted_at": r["submittedAt"],
48
+ }
49
+ for r in raw_pr["reviews"]["nodes"]
50
+ ],
51
+ }
52
+
53
+ def flatten_prs(nodes: list[dict]) -> list[dict]:
54
+ """Flatten a list of raw GraphQL PR nodes."""
55
+ return [flatten_pr(pr) for pr in nodes]
@@ -0,0 +1,192 @@
1
+ Metadata-Version: 2.4
2
+ Name: github-pr-context-mcp
3
+ Version: 0.2.5
4
+ Summary: GitHub PR Review Context MCP Server
5
+ Author: Paarth Gala
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: mcp
10
+ Requires-Dist: chromadb
11
+ Requires-Dist: sentence-transformers
12
+ Requires-Dist: python-dotenv
13
+ Requires-Dist: requests
14
+ Requires-Dist: cerebras-cloud-sdk
15
+ Requires-Dist: openai
16
+ Requires-Dist: anthropic
17
+ Requires-Dist: google-generativeai
18
+ Dynamic: license-file
19
+
20
+ # GitHub PR Review Context MCP
21
+
22
+ <div align="center">
23
+
24
+ ![Python](https://img.shields.io/badge/Python-3.10%2B-blue?logo=python&logoColor=white)
25
+ ![Protocol](https://img.shields.io/badge/Protocol-MCP-green)
26
+ ![Data Source](https://img.shields.io/badge/Data-GitHub%20PR%20History-black?logo=github)
27
+ ![Vector Store](https://img.shields.io/badge/Storage-ChromaDB-orange)
28
+ ![Inference](https://img.shields.io/badge/LLM-Multi--Provider-brightgreen)
29
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
30
+ ![Status](https://img.shields.io/badge/Render%20Hosting-Upcoming-gray)
31
+
32
+ **Production-grade context layer for AI code review, grounded in your repository's real pull request history.**
33
+
34
+
35
+ > Tracking unique users across **uvx**, **pipx**, and **local** sources. (Render hosting upcoming)
36
+
37
+ </div>
38
+
39
+ ---
40
+
41
+ ## Overview
42
+
43
+ GitHub PR Review Context MCP gives AI assistants institutional review memory.
44
+
45
+ Instead of generic feedback, reviews are informed by historical reviewer comments, recurring quality patterns, and repository-specific standards from your own PR history.
46
+
47
+ ### Core Value
48
+
49
+ - Improves review consistency across teams and repositories.
50
+ - Reduces repeated reviewer feedback on known issues.
51
+ - Integrates with any MCP-compatible client and multiple LLM providers.
52
+
53
+ ---
54
+
55
+ ## 🛠️ Usage Modes: Solo vs. Team
56
+
57
+ This MCP server is built to scale from a single machine to an entire engineering organization.
58
+
59
+ ### 👤 Solo Developer (Local Mode)
60
+ **Best for:** Privacy, local-first control, and zero hosting costs.
61
+ - **How it works:** Run via `uvx`, `pipx`, or a local git clone.
62
+ - **Storage:** ChromaDB stays on your local machine.
63
+ - **Security:** Your GitHub Token and LLM keys never leave your device.
64
+ - **Setup:** See [Quick Start](docs/quickstart.md#🚀-zero-setup-uvx--pipx--npx).
65
+
66
+ ### 🤝 Team Collaboration (Hosted Mode - UPCOMING)
67
+ **Best for:** Scaling team-wide PR standards and centralized infra.
68
+ - **How it works:** One deployment on Render (Coming Soon) shared by the whole team.
69
+ - **Isolation:** Strict **Gmail-based namespace isolation** (driven by SQLite). User A's indexed data is mathematically invisible to User B.
70
+ - **Economics:** Pooled LLM credits and a single shared indexing server.
71
+ - **Setup:** See [Deployment Guide](docs/integrations/deployed.md).
72
+
73
+ ---
74
+
75
+ ### 🌟 Zero-Friction Setup (Upcoming)
76
+ If your team has Hosted this MCP on Render, you do **NOT** need to `git clone` or install anything. You just drop a snippet into your IDE:
77
+
78
+ ```json
79
+ "github-pr-context": {
80
+ "type": "sse",
81
+ "url": "https://YOUR-RENDER-URL.onrender.com/mcp",
82
+ "headers": {
83
+ "Authorization": "Bearer YOUR_TOKEN"
84
+ }
85
+ }
86
+ ```
87
+ *That's it.* If your IDE supports native MCP SSE connections, you are immediately connected to the secure Render deployment. No setup friction, no tools required.
88
+
89
+ ---
90
+
91
+ ## Key Capabilities
92
+
93
+ | Capability | What It Delivers |
94
+ |---|---|
95
+ | Historical review retrieval | Semantic search across prior PR comments and review summaries |
96
+ | Context-aware AI review | Feedback grounded in repository-specific review behavior |
97
+ | Grounded code generation | Generate new code based on past commits, comments, and style |
98
+ | **Team rules generation** | **Auto-generate .cursorrules / CLAUDE.md from repo history** |
99
+ | Smart repository readiness | Auto-detect indexed state and index on demand |
100
+ | Flexible storage modes | Permanent (disk) and temporary (in-memory) indexing options |
101
+ | Portable inference layer | Switch LLM providers using environment configuration only |
102
+
103
+ ---
104
+
105
+ ## Demo
106
+
107
+ ![demo](assets/demo.gif)
108
+
109
+ Example workflow:
110
+ - Ask the assistant to review a diff using repository history.
111
+ - The server retrieves similar past review context.
112
+ - The model returns grounded feedback aligned to team expectations.
113
+
114
+ ## Usage Analytics
115
+
116
+ To help us understand adoption, the MCP server collects privacy-first, anonymous telemetry on deployments. Future hosted deployments will expose HTTP endpoints (`/stats` and `/ping`) that publicly display the **number of unique users**.
117
+
118
+ ---
119
+
120
+ ## 🧰 Core Tools Reference
121
+
122
+ The server exposes 12 core tools for IDE agents and developers. For a deep dive on when to use each, see the [**Tool Strategy Guide**](docs/tools_strategy.md).
123
+
124
+ | Tool | Action |
125
+ |---|---|
126
+ | `ensure_repo_ready` | Index a repo and ensure it's ready for queries |
127
+ | `generate_repo_rules` | **Synthesize .cursorrules / CLAUDE.md from PR history** |
128
+ | `generate_code_from_history`| Write code grounded in past commits & team style |
129
+ | `review_code_with_history` | Perform AI review grounded in team review memory |
130
+ | `get_team_review_patterns` | Summarize recurring team standards (e.g. "no magic numbers") |
131
+ | `semantic_search_reviews` | Search past PR comments by meaning, not just keywords |
132
+ | `set_active_repo` | Switch between multiple indexed repositories |
133
+ | `list_indexed_repos` | View all repos currently in local/temporary storage |
134
+ | `delete_repo_index` | Free up disk space by clearing repository indices |
135
+ | `get_index_stats` | Verify if a repo index is complete (doc count) |
136
+ | `update_settings` | Update tokens/LLM keys (Hosted mode only) |
137
+ | `get_usage_stats` | View adoption metrics and unique user counts |
138
+
139
+ ---
140
+
141
+ ## Documentation
142
+
143
+ Detailed guides are split into focused pages:
144
+
145
+ - [Quick Start and Usage](docs/quickstart.md)
146
+ - [LLM Configuration](docs/llm-configuration.md)
147
+ - [Integrations](docs/integrations/index.md)
148
+ - [Architecture and Tools](docs/architecture.md)
149
+ - [Pipeline Deep Dive](docs/pipeline.md)
150
+ - [Configuration Guide (Change Tokens/Settings)](docs/guides/configuration.md)
151
+ - [Roadmap](docs/roadmap.md)
152
+
153
+ ---
154
+
155
+ ## Quick Links
156
+
157
+ - Access setup: [GitHub Token Guide](docs/GUIDE_GITHUB_TOKEN.md)
158
+ - Client connection: [Integrations](docs/integrations/index.md)
159
+
160
+ ---
161
+
162
+ ## 📣 Community & Feedback
163
+
164
+ We want to hear from you—whether you are a solo developer or a team at a large company!
165
+
166
+ ### 👤 For Individuals
167
+ - **Feedback**: Please open an issue or start a discussion if you have ideas or encounter bugs.
168
+ - **Show your support**: If this tool saves you time, give it a **Star ⭐**! It helps others find the project.
169
+
170
+ ### 🏢 For Corporate & Teams
171
+ - **Usage**: Is your team using this MCP server? Join our "Adopters" list by opening a PR to add your team's name.
172
+ - **Corporate Feedback**: Open an issue with the `corporate-usage` label to tell us how this has improved your PR review workflow.
173
+ - **Custom Integration**: Need help deploying this to your private cloud? Reach out via GitHub Discussions.
174
+
175
+ ---
176
+
177
+ ## 📜 Documentation & Guides
178
+
179
+ - **Strategy & Best Practices**: [Tool Strategy & Selection Guide](docs/tools_strategy.md)
180
+ - **Architecture**: [Architecture and Tools](docs/architecture.md)
181
+ - **Pipeline**: [Pipeline Deep Dive](docs/pipeline.md)
182
+ - **Usage**: [Quick Start and Usage](docs/quickstart.md)
183
+
184
+ ## 🛠️ Troubleshooting
185
+
186
+ - **"command not found"**: Use absolute paths in your configuration. Run `github-pr-context-mcp config` to get your exact path.
187
+ - **"PermissionError: [WinError 32]"**: The binary is locked by a running process. Close Claude/Cursor, run `taskkill /F /IM github-pr-context-mcp.exe`, then retry the upgrade.
188
+ - **Rate Limit Errors**: Ensure your `GITHUB_TOKEN` is valid and has `repo` scope.
189
+
190
+ ## ⚖️ License
191
+
192
+ MIT
@@ -0,0 +1,25 @@
1
+ analytics/__init__.py,sha256=bGt2HZvSi9zx8r84EXYUaK5ACOy0i5_E8U2oE1CyaBs,90
2
+ analytics/usage_metrics.py,sha256=Kp78y1hsNouAlyZ8OQ-CVYSCv_X17M6OcXhG3UYefN4,8791
3
+ app/__init__.py,sha256=sqeHWMqFLhIETKmsSJrccwQVpvdTrislT6V4g0A98rw,50
4
+ app/mcp_app.py,sha256=PpGrpZumfD-xv7hpjvjZhXlScxcDHoCcii6H9Mm-xK0,34289
5
+ auth/__init__.py,sha256=ynl-1KLMvJRG-MQij8IBI3-gLXKHJF4yQCA4stiOh24,172
6
+ auth/gmail_identity.py,sha256=eAr0XQowOnX0X7-nxcUMvnpD6oOmSc6GGBa4_o-aQlI,8925
7
+ entrypoints/deployed/server.py,sha256=1HwlLLi-1_9OLA-CzylOwTmmC2DUygQZpi6xSPw8wzs,761
8
+ entrypoints/local/server.py,sha256=NNxs96lrVOhZPqQu5yUpNUtWHHqyTiNW1IHQEP-D9J4,11195
9
+ fetcher/__init__.py,sha256=Ds51hEct0obY0SM0xPbZgd2BSLHeFHFSTSZp8jQsLK8,62
10
+ fetcher/client.py,sha256=fhbpjp0Te9PGc9g85WPhrtlkfKQnwXhEr-Luowkq-k8,4574
11
+ fetcher/queries.py,sha256=H2i5nULQJDJWBRlXETQCF3QSxu8whV5HXieldY-WG9I,1345
12
+ fetcher/transform.py,sha256=_fQ7y74Ou9LR5KfknzS7S9Yl3YCE6j4sv6yrSzP095E,2042
13
+ github_pr_context_mcp-0.2.5.dist-info/licenses/LICENSE,sha256=M4TB72oBDWxvebDG6nolZTQVKRe-cVrdC_8JJSFVGic,1068
14
+ inference/__init__.py,sha256=4lPbvKJw0vixkpHjWdHd1PWVX5tJiaa5wHFF1SS7TUk,224
15
+ inference/providers.py,sha256=ishiUiDU_vemU6fYU45DEoCvc1NXWGrgZbD1Qc4rGls,10912
16
+ inference/review.py,sha256=o0jqJx9xEio9vxAOJy0DSmTUDnJYHhshvolQ7gnAAcg,6503
17
+ storage/__init__.py,sha256=ueQibOr9NqA_slWfCTQ47apkzLjQYu_Q6nC1ZYcnofs,404
18
+ storage/document_builder.py,sha256=-CQgtE1VgjkXiRDL0xptoKJ7uqBkRjyCUi_y4gBWvCo,2685
19
+ storage/encoder.py,sha256=CBx-xPkFYfApA9mHRuYKh_fa6-7VA7CBE24TF0fejHs,1141
20
+ storage/vector_store.py,sha256=cAdzshP-cQ7w2mUOePUMNfWmWpokUUrFASr6B9IETGU,9592
21
+ github_pr_context_mcp-0.2.5.dist-info/METADATA,sha256=P4OmHFpHgpNS-_OYxdLaQ1dFgaKqvFI-Zfn6N5gECjo,7896
22
+ github_pr_context_mcp-0.2.5.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
23
+ github_pr_context_mcp-0.2.5.dist-info/entry_points.txt,sha256=_tVIQ9b41eiaNOOAOYKx5eteC80MLN0V7apxFZBBI_0,72
24
+ github_pr_context_mcp-0.2.5.dist-info/top_level.txt,sha256=2m7n-NQrzlzfMlSk3nhopmv_PQPDq0d6MK1SceIULMM,57
25
+ github_pr_context_mcp-0.2.5.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ github-pr-context-mcp = entrypoints.local.server:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Paarth Gala
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,7 @@
1
+ analytics
2
+ app
3
+ auth
4
+ entrypoints
5
+ fetcher
6
+ inference
7
+ storage
inference/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from inference.review import review_with_context, summarize_patterns, generate_with_context, generate_rules_content
2
+
3
+ __all__ = ["review_with_context", "summarize_patterns", "generate_with_context", "generate_rules_content"]
inference/providers.py ADDED
@@ -0,0 +1,296 @@
1
+ # Unified LLM provider adapter.
2
+ # Supports: cerebras | openai | anthropic | ollama | groq | gemini
3
+ # Configured entirely via environment variables — no code changes needed to switch.
4
+
5
+ import os
6
+ import re
7
+ import time
8
+ from datetime import datetime, timezone
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+
13
+ LLM_PROVIDER = os.getenv("LLM_PROVIDER", "cerebras").lower()
14
+ LLM_MODEL = os.getenv("LLM_MODEL", "llama3.1-8b")
15
+ LLM_BASE_URL = os.getenv("LLM_BASE_URL", "") # Required only for Ollama
16
+
17
+ # OpenAI-compatible providers — share the same client interface
18
+ _OPENAI_COMPATIBLE = {"cerebras", "openai", "ollama", "groq"}
19
+ _RATE_LIMIT_PATTERN = re.compile(
20
+ r"rate limit|too many requests|quota exceeded|429",
21
+ re.IGNORECASE,
22
+ )
23
+
24
+
25
+ def _effective_settings(settings: dict | None = None) -> dict[str, str]:
26
+ settings = settings or {}
27
+ provider = str(settings.get("llm_provider") or LLM_PROVIDER).strip().lower()
28
+ model = str(settings.get("llm_model") or LLM_MODEL).strip()
29
+ api_key = str(settings.get("llm_api_key") or "").strip()
30
+ base_url = str(settings.get("llm_base_url") or LLM_BASE_URL).strip()
31
+ return {
32
+ "llm_provider": provider,
33
+ "llm_model": model,
34
+ "llm_api_key": api_key,
35
+ "llm_base_url": base_url,
36
+ }
37
+
38
+
39
+ def chat(
40
+ messages: list[dict],
41
+ system: str = "",
42
+ max_tokens: int = 1024,
43
+ settings: dict | None = None,
44
+ ) -> str:
45
+ """
46
+ Unified chat completion across all supported providers.
47
+
48
+ Args:
49
+ messages: List of {"role": "user"|"assistant", "content": str}
50
+ system: Optional system prompt string
51
+ max_tokens: Max tokens to generate
52
+
53
+ Returns:
54
+ The assistant's reply as a string.
55
+ """
56
+ try:
57
+ effective = _effective_settings(settings)
58
+ provider = effective["llm_provider"]
59
+
60
+ if provider in _OPENAI_COMPATIBLE:
61
+ return _openai_compatible(messages, system, max_tokens, effective)
62
+ elif provider == "anthropic":
63
+ return _anthropic(messages, system, max_tokens, effective)
64
+ elif provider == "gemini":
65
+ return _gemini(messages, system, max_tokens, effective)
66
+ else:
67
+ raise ValueError(
68
+ f"Unknown LLM_PROVIDER: '{provider}'. "
69
+ "Valid options: cerebras, openai, anthropic, ollama, groq, gemini"
70
+ )
71
+ except Exception as error:
72
+ retry_message = _format_rate_limit_message(error, settings=settings)
73
+ if retry_message:
74
+ raise RuntimeError(retry_message) from error
75
+ raise
76
+
77
+
78
+ # ── OpenAI-compatible (Cerebras, OpenAI, Ollama, Groq) ───────────────────────
79
+
80
+ def _build_openai_client(settings: dict[str, str]):
81
+ """Return the right OpenAI-compatible client for the configured provider."""
82
+ provider = settings["llm_provider"]
83
+ if provider == "cerebras":
84
+ try:
85
+ from cerebras.cloud.sdk import Cerebras
86
+ return Cerebras(api_key=_require_key("CEREBRAS_API_KEY", "cloud.cerebras.ai", settings))
87
+ except ImportError:
88
+ raise ImportError("Run: pip install cerebras-cloud-sdk")
89
+
90
+ elif provider == "groq":
91
+ try:
92
+ from openai import OpenAI
93
+ return OpenAI(
94
+ api_key=_require_key("GROQ_API_KEY", "console.groq.com/keys", settings),
95
+ base_url="https://api.groq.com/openai/v1",
96
+ )
97
+ except ImportError:
98
+ raise ImportError("Run: pip install openai")
99
+
100
+ elif provider == "ollama":
101
+ try:
102
+ from openai import OpenAI
103
+ base_url = settings.get("llm_base_url") or "http://localhost:11434/v1"
104
+ return OpenAI(base_url=base_url, api_key="ollama")
105
+ except ImportError:
106
+ raise ImportError("Run: pip install openai")
107
+
108
+ else: # openai
109
+ try:
110
+ from openai import OpenAI
111
+ return OpenAI(api_key=_require_key("OPENAI_API_KEY", "platform.openai.com", settings))
112
+ except ImportError:
113
+ raise ImportError("Run: pip install openai")
114
+
115
+
116
+ def _openai_compatible(messages: list[dict], system: str, max_tokens: int, settings: dict[str, str]) -> str:
117
+ client = _build_openai_client(settings)
118
+ full_messages = (
119
+ [{"role": "system", "content": system}] if system else []
120
+ ) + messages
121
+
122
+ response = client.chat.completions.create(
123
+ model=settings["llm_model"],
124
+ max_tokens=max_tokens,
125
+ messages=full_messages,
126
+ )
127
+ return response.choices[0].message.content
128
+
129
+
130
+ # ── Anthropic ──────────────────────────────────────────────────────────────────
131
+
132
+ def _anthropic(messages: list[dict], system: str, max_tokens: int, settings: dict[str, str]) -> str:
133
+ try:
134
+ import anthropic
135
+ except ImportError:
136
+ raise ImportError("Run: pip install anthropic")
137
+
138
+ client = anthropic.Anthropic(
139
+ api_key=_require_key("ANTHROPIC_API_KEY", "console.anthropic.com", settings)
140
+ )
141
+ kwargs = {"model": settings["llm_model"], "max_tokens": max_tokens, "messages": messages}
142
+ if system:
143
+ kwargs["system"] = system
144
+
145
+ response = client.messages.create(**kwargs)
146
+ return response.content[0].text
147
+
148
+
149
+ # ── Gemini ────────────────────────────────────────────────────────────────────
150
+
151
+ def _gemini(messages: list[dict], system: str, max_tokens: int, settings: dict[str, str]) -> str:
152
+ try:
153
+ import google.generativeai as genai
154
+ except ImportError:
155
+ raise ImportError("Run: pip install google-generativeai")
156
+
157
+ genai.configure(api_key=_require_key("GEMINI_API_KEY", "aistudio.google.com", settings))
158
+
159
+ model = genai.GenerativeModel(
160
+ model_name=settings["llm_model"],
161
+ system_instruction=system or None,
162
+ )
163
+
164
+ # Convert OpenAI-style message list to Gemini's format
165
+ gemini_history = []
166
+ last_user_message = ""
167
+
168
+ for msg in messages:
169
+ role = "user" if msg["role"] == "user" else "model"
170
+ if msg["role"] == "user":
171
+ last_user_message = msg["content"]
172
+ else:
173
+ gemini_history.append({"role": role, "parts": [msg["content"]]})
174
+
175
+ chat_session = model.start_chat(history=gemini_history)
176
+ response = chat_session.send_message(
177
+ last_user_message,
178
+ generation_config=genai.GenerationConfig(max_output_tokens=max_tokens),
179
+ )
180
+ return response.text
181
+
182
+
183
+ # ── Helper ─────────────────────────────────────────────────────────────────────
184
+
185
+ def _require_key(env_var: str, signup_url: str, settings: dict[str, str] | None = None) -> str:
186
+ """
187
+ Get an API key from env. Checks the provider-specific var first,
188
+ then falls back to the universal LLM_API_KEY so users only need
189
+ to change one value when switching providers.
190
+ """
191
+ settings = settings or {}
192
+ value = settings.get("llm_api_key") or os.getenv(env_var) or os.getenv("LLM_API_KEY")
193
+ if not value:
194
+ raise EnvironmentError(
195
+ f"No API key found. Set either '{env_var}' or 'LLM_API_KEY' in your .env file.\n"
196
+ f"Get your key at: {signup_url}"
197
+ )
198
+ return value
199
+
200
+
201
+ def _format_rate_limit_message(error: Exception, settings: dict | None = None) -> str | None:
202
+ """Turn provider rate-limit errors into a reset-time hint."""
203
+ if not _looks_like_rate_limit(error):
204
+ return None
205
+
206
+ retry_after_seconds, reset_at_text = _rate_limit_reset_hint(error)
207
+ provider_name = _effective_settings(settings)["llm_provider"].capitalize()
208
+
209
+ if retry_after_seconds is not None:
210
+ retry_hours = max(retry_after_seconds / 3600, 0.0)
211
+ if retry_hours < 1:
212
+ retry_minutes = max(round(retry_after_seconds / 60), 1)
213
+ return (
214
+ f"{provider_name} rate limit reached. Try again after about "
215
+ f"{retry_minutes} minutes."
216
+ )
217
+ return (
218
+ f"{provider_name} rate limit reached. Try again after about "
219
+ f"{retry_hours:.1f} hours."
220
+ )
221
+
222
+ if reset_at_text:
223
+ return (
224
+ f"{provider_name} rate limit reached. Limit resets at {reset_at_text}. "
225
+ "Try again after that reset time."
226
+ )
227
+
228
+ return (
229
+ f"{provider_name} rate limit reached. Check the provider dashboard for "
230
+ "when the quota resets and try again after that."
231
+ )
232
+
233
+
234
+ def _looks_like_rate_limit(error: Exception) -> bool:
235
+ """Detect common provider quota and throttling failures."""
236
+ response = getattr(error, "response", None)
237
+ status_code = getattr(response, "status_code", None)
238
+ if status_code == 429:
239
+ return True
240
+
241
+ response_headers = getattr(response, "headers", None) or {}
242
+ if any(key.lower() in {"retry-after", "x-ratelimit-reset"} for key in response_headers):
243
+ return True
244
+
245
+ return bool(_RATE_LIMIT_PATTERN.search(str(error)))
246
+
247
+
248
+ def _rate_limit_reset_hint(error: Exception) -> tuple[int | None, str | None]:
249
+ """Extract retry timing hints from the provider exception when available."""
250
+ header_sources = []
251
+
252
+ response = getattr(error, "response", None)
253
+ if response is not None:
254
+ headers = getattr(response, "headers", None)
255
+ if headers:
256
+ header_sources.append(headers)
257
+
258
+ headers = getattr(error, "headers", None)
259
+ if headers:
260
+ header_sources.append(headers)
261
+
262
+ retry_after = None
263
+ reset_at = None
264
+
265
+ for header_map in header_sources:
266
+ for key, value in header_map.items():
267
+ normalized_key = key.lower()
268
+ if normalized_key == "retry-after" and retry_after is None:
269
+ retry_after = value
270
+ elif normalized_key == "x-ratelimit-reset" and reset_at is None:
271
+ reset_at = value
272
+
273
+ retry_after_seconds = _coerce_retry_after_seconds(retry_after)
274
+ if retry_after_seconds is not None:
275
+ return retry_after_seconds, None
276
+
277
+ reset_seconds = _coerce_retry_after_seconds(reset_at)
278
+ if reset_seconds is not None:
279
+ remaining_seconds = int(round(reset_seconds - time.time()))
280
+ if remaining_seconds > 0:
281
+ reset_time = datetime.fromtimestamp(reset_seconds, tz=timezone.utc)
282
+ reset_text = reset_time.strftime("%Y-%m-%d %H:%M UTC")
283
+ return remaining_seconds, reset_text
284
+
285
+ return None, None
286
+
287
+
288
+ def _coerce_retry_after_seconds(value: object) -> int | None:
289
+ """Convert a retry/reset header into seconds when possible."""
290
+ if value is None:
291
+ return None
292
+
293
+ try:
294
+ return int(float(str(value).strip()))
295
+ except (TypeError, ValueError):
296
+ return None